diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64')
50 files changed, 21961 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h new file mode 100644 index 0000000..4de4faa --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64.h @@ -0,0 +1,42 @@ +//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// AArch64 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_H +#define LLVM_TARGET_AARCH64_H + +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AArch64AsmPrinter; +class FunctionPass; +class AArch64TargetMachine; +class MachineInstr; +class MCInst; + +FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel); + +FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); + +FunctionPass *createAArch64BranchFixupPass(); + +void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AArch64AsmPrinter &AP); + + +} + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td new file mode 100644 index 0000000..e17052b --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64.td @@ -0,0 +1,70 @@ +//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the AArch64 target. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// AArch64 Subtarget features. +// + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions">; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions">; + +//===----------------------------------------------------------------------===// +// AArch64 Processors +// + +include "AArch64Schedule.td" + +def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" + +include "AArch64CallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AArch64InstrInfo.td" + +def AArch64InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// + +def A64InstPrinter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; + let AssemblyWriters = [A64InstPrinter]; +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp new file mode 100644 index 0000000..47ebb82 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -0,0 +1,347 @@ +//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format AArch64 assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "AArch64AsmPrinter.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/DebugInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +MachineLocation +AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // See emitFrameIndexDebugValue in InstrInfo for where this instruction is + // expected to be created. + assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg() + && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE"); + return MachineLocation(MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); +} + +/// Try to print a floating-point register as if it belonged to a specified +/// register-class. For example the inline asm operand modifier "b" requires its +/// argument to be printed as "bN". +static bool printModifiedFPRAsmOperand(const MachineOperand &MO, + const TargetRegisterInfo *TRI, + const TargetRegisterClass &RegClass, + raw_ostream &O) { + if (!MO.isReg()) + return true; + + for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { + if (RegClass.contains(*AR)) { + O << AArch64InstPrinter::getRegisterName(*AR); + return false; + } + } + return true; +} + +/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR +/// with the obvious type and an immediate 0 as either wzr or xzr. +static bool printModifiedGPRAsmOperand(const MachineOperand &MO, + const TargetRegisterInfo *TRI, + const TargetRegisterClass &RegClass, + raw_ostream &O) { + char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x'; + + if (MO.isImm() && MO.getImm() == 0) { + O << Prefix << "zr"; + return false; + } else if (MO.isReg()) { + if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) { + O << (Prefix == 'x' ? "sp" : "wsp"); + return false; + } + + for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { + if (RegClass.contains(*AR)) { + O << AArch64InstPrinter::getRegisterName(*AR); + return false; + } + } + } + + return true; +} + +bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, + bool PrintImmediatePrefix, + StringRef Suffix, raw_ostream &O) { + StringRef Name; + StringRef Modifier; + switch (MO.getType()) { + default: + llvm_unreachable("Unexpected operand for symbolic address constraint"); + case MachineOperand::MO_GlobalAddress: + Name = Mang->getSymbol(MO.getGlobal())->getName(); + + // Global variables may be accessed either via a GOT or in various fun and + // interesting TLS-model specific ways. Set the prefix modifier as + // appropriate here. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) { + Reloc::Model RelocM = TM.getRelocationModel(); + if (GV->isThreadLocal()) { + switch (TM.getTLSModel(GV)) { + case TLSModel::GeneralDynamic: + Modifier = "tlsdesc"; + break; + case TLSModel::LocalDynamic: + Modifier = "dtprel"; + break; + case TLSModel::InitialExec: + Modifier = "gottprel"; + break; + case TLSModel::LocalExec: + Modifier = "tprel"; + break; + } + } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + Modifier = "got"; + } + } + break; + case MachineOperand::MO_BlockAddress: + Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; + case MachineOperand::MO_ExternalSymbol: + Name = MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + Name = GetCPISymbol(MO.getIndex())->getName(); + break; + } + + // Some instructions (notably ADRP) don't take the # prefix for + // immediates. Only print it if asked to. + if (PrintImmediatePrefix) + O << '#'; + + // Only need the joining "_" if both the prefix and the suffix are + // non-null. This little block simply takes care of the four possibly + // combinations involved there. + if (Modifier == "" && Suffix == "") + O << Name; + else if (Modifier == "" && Suffix != "") + O << ":" << Suffix << ':' << Name; + else if (Modifier != "" && Suffix == "") + O << ":" << Modifier << ':' << Name; + else + O << ":" << Modifier << '_' << Suffix << ':' << Name; + + return false; +} + +bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (!ExtraCode || !ExtraCode[0]) { + // There's actually no operand modifier, which leads to a slightly eclectic + // set of behaviour which we have to handle here. + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + default: + llvm_unreachable("Unexpected operand for inline assembly"); + case MachineOperand::MO_Register: + // GCC prints the unmodified operand of a 'w' constraint as the vector + // register. Technically, we could allocate the argument as a VPR128, but + // that leads to extremely dodgy copies being generated to get the data + // there. + if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O)) + O << AArch64InstPrinter::getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + O << '#' << MO.getImm(); + break; + case MachineOperand::MO_FPImmediate: + assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); + O << "#0.0"; + break; + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + return printSymbolicAddress(MO, false, "", O); + } + return false; + } + + // We have a real modifier to handle. + switch(ExtraCode[0]) { + default: + // See if this is a generic operand + return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); + case 'c': // Don't print "#" before an immediate operand. + if (!MI->getOperand(OpNum).isImm()) + return true; + O << MI->getOperand(OpNum).getImm(); + return false; + case 'w': + // Output 32-bit general register operand, constant zero as wzr, or stack + // pointer as wsp. Ignored when used with other operand types. + return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR32RegClass, O); + case 'x': + // Output 64-bit general register operand, constant zero as xzr, or stack + // pointer as sp. Ignored when used with other operand types. + return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR64RegClass, O); + case 'H': + // Output higher numbered of a 64-bit general register pair + case 'Q': + // Output least significant register of a 64-bit general register pair + case 'R': + // Output most significant register of a 64-bit general register pair + + // FIXME note: these three operand modifiers will require, to some extent, + // adding a paired GPR64 register class. Initial investigation suggests that + // assertions are hit unless it has a type and is made legal for that type + // in ISelLowering. After that step is made, the number of modifications + // needed explodes (operation legality, calling conventions, stores, reg + // copies ...). + llvm_unreachable("FIXME: Unimplemented register pairs"); + case 'b': + // Output 8-bit FP/SIMD scalar register operand, prefixed with b. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR8RegClass, O); + case 'h': + // Output 16-bit FP/SIMD scalar register operand, prefixed with h. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR16RegClass, O); + case 's': + // Output 32-bit FP/SIMD scalar register operand, prefixed with s. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR32RegClass, O); + case 'd': + // Output 64-bit FP/SIMD scalar register operand, prefixed with d. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR64RegClass, O); + case 'q': + // Output 128-bit FP/SIMD scalar register operand, prefixed with q. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR128RegClass, O); + case 'A': + // Output symbolic address with appropriate relocation modifier (also + // suitable for ADRP). + return printSymbolicAddress(MI->getOperand(OpNum), false, "", O); + case 'L': + // Output bits 11:0 of symbolic address with appropriate :lo12: relocation + // modifier. + return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O); + case 'G': + // Output bits 23:12 of symbolic address with appropriate :hi12: relocation + // modifier (currently only for TLS local exec). + return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O); + } + + +} + +bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + // Currently both the memory constraints (m and Q) behave the same and amount + // to the address as a single register. In future, we may allow "m" to provide + // both a base and an offset. + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isReg() && "unexpected inline assembly memory operand"); + O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']'; + return false; +} + +void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg()); + OS << '+' << MI->getOperand(1).getImm(); + OS << ']'; + OS << "+" << MI->getOperand(NOps - 2).getImm(); +} + + +#include "AArch64GenMCPseudoLowering.inc" + +void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, MI)) + return; + + switch (MI->getOpcode()) { + case AArch64::DBG_VALUE: { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + SmallString<128> TmpStr; + raw_svector_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + } + } + + MCInst TmpInst; + LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); +} + +void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer.EmitLabel(Stubs[i].first); + OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), + TD->getPointerSize(0), 0); + } + Stubs.clear(); + } + } +} + +bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + return AsmPrinter::runOnMachineFunction(MF); +} + +// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmPrinter() { + RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target); +} + diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h new file mode 100644 index 0000000..af0c9fe --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h @@ -0,0 +1,80 @@ +// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AArch64 assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64ASMPRINTER_H +#define LLVM_AARCH64ASMPRINTER_H + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MCOperand; + +class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { + + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when printing asm code for different targets. + const AArch64Subtarget *Subtarget; + + // emitPseudoExpansionLowering - tblgen'erated. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + public: + explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<AArch64Subtarget>(); + } + + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; + + MCOperand lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Sym) const; + + void EmitInstruction(const MachineInstr *MI); + void EmitEndOfAsmFile(Module &M); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + /// printSymbolicAddress - Given some kind of reasonably bare symbolic + /// reference, print out the appropriate asm string to represent it. If + /// appropriate, a relocation-specifier will be produced, composed of a + /// general class derived from the MO parameter and an instruction-specific + /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is + /// given. + bool printSymbolicAddress(const MachineOperand &MO, + bool PrintImmediatePrefix, + StringRef Suffix, raw_ostream &O); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + + virtual const char *getPassName() const { + return "AArch64 Assembly Printer"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF); +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp new file mode 100644 index 0000000..71233ba --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp @@ -0,0 +1,600 @@ +//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that fixes AArch64 branches which have ended up out +// of range for their immediate operands. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-branch-fixup" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); + +/// Return the worst case padding that could result from unknown offset bits. +/// This does not include alignment padding caused by known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +namespace { + /// Due to limited PC-relative displacements, conditional branches to distant + /// blocks may need converting into an unconditional equivalent. For example: + /// tbz w1, #0, far_away + /// becomes + /// tbnz w1, #0, skip + /// b far_away + /// skip: + class AArch64BranchFixup : public MachineFunctionPass { + /// Information about the offset and size of a single basic block. + struct BasicBlockInfo { + /// Distance from the beginning of the function to the beginning of this + /// basic block. + /// + /// Offsets are computed assuming worst case padding before an aligned + /// block. This means that subtracting basic block offsets always gives a + /// conservative estimate of the real distance which may be smaller. + /// + /// Because worst case padding is used, the computed offset of an aligned + /// block may not actually be aligned. + unsigned Offset; + + /// Size of the basic block in bytes. If the block contains inline + /// assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// The number of low bits in Offset that are known to be exact. The + /// remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// When non-zero, the block contains instructions (inline asm) of unknown + /// size. The real size may be smaller than Size bytes by a multiple of 1 + /// << Unalign. + uint8_t Unalign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + unsigned Bits = Unalign ? Unalign : KnownBits; + // If the block size isn't a multiple of the known bits, assume the + // worst case padding. + if (Size & ((1u << Bits) - 1)) + Bits = CountTrailingZeros_32(Size); + return Bits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + if (!LogAlign) + return PO; + // Add alignment padding from the terminator. + return PO + UnknownPadding(LogAlign, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(LogAlign, internalKnownBits()); + } + }; + + std::vector<BasicBlockInfo> BBInfo; + + /// One per immediate branch, keeping the machine instruction pointer, + /// conditional or unconditional, the max displacement, and (if IsCond is + /// true) the corresponding inverted branch opcode. + struct ImmBranch { + MachineInstr *MI; + unsigned OffsetBits : 31; + bool IsCond : 1; + ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) + : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} + }; + + /// Keep track of all the immediate branch instructions. + /// + std::vector<ImmBranch> ImmBranches; + + MachineFunction *MF; + const AArch64InstrInfo *TII; + public: + static char ID; + AArch64BranchFixup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "AArch64 branch fixup pass"; + } + + private: + void initializeFunctionInfo(); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, + unsigned OffsetBits); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + void dumpBBs(); + void verify(); + }; + char AArch64BranchFixup::ID = 0; +} + +/// check BBOffsets +void AArch64BranchFixup::verify() { +#ifndef NDEBUG + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + unsigned MBBId = MBB->getNumber(); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); + } +#endif +} + +/// print block size and offset information - debugging +void AArch64BranchFixup::dumpBBs() { + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); +} + +/// Returns an instance of the branch fixup pass. +FunctionPass *llvm::createAArch64BranchFixupPass() { + return new AArch64BranchFixup(); +} + +bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + DEBUG(dbgs() << "***** AArch64BranchFixup ******"); + TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); + + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + MF->RenumberBlocks(); + + // Do the initial scan of the function, building up information about the + // sizes of each block and location of each immediate branch. + initializeFunctionInfo(); + + // Iteratively fix up branches until there is no change. + unsigned NoBRIters = 0; + bool MadeChange = false; + while (true) { + DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n'); + bool BRChange = false; + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) + BRChange |= fixupImmediateBr(ImmBranches[i]); + if (BRChange && ++NoBRIters > 30) + report_fatal_error("Branch Fix Up pass failed to converge!"); + DEBUG(dumpBBs()); + + if (!BRChange) + break; + MadeChange = true; + } + + // After a while, this might be made debug-only, but it is not expensive. + verify(); + + DEBUG(dbgs() << '\n'; dumpBBs()); + + BBInfo.clear(); + ImmBranches.clear(); + + return MadeChange; +} + +/// Return true if the specified basic block can fallthrough into the block +/// immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) + return false; + + MachineBasicBlock *NextBB = llvm::next(MBBI); + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I == NextBB) + return true; + + return false; +} + +/// Do the initial scan of the function, building up information about the sizes +/// of each block, and each immediate branch. +void AArch64BranchFixup::initializeFunctionInfo() { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + computeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + adjustBBOffsetsAfter(MF->begin()); + + // Now go back through the instructions and build up our data structures. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + if (I->isDebugValue()) + continue; + + int Opc = I->getOpcode(); + if (I->isBranch()) { + bool IsCond = false; + + // The offsets encoded in instructions here scale by the instruction + // size (4 bytes), effectively increasing their range by 2 bits. + unsigned Bits = 0; + switch (Opc) { + default: + continue; // Ignore other JT branches + case AArch64::TBZxii: + case AArch64::TBZwii: + case AArch64::TBNZxii: + case AArch64::TBNZwii: + IsCond = true; + Bits = 14 + 2; + break; + case AArch64::Bcc: + case AArch64::CBZx: + case AArch64::CBZw: + case AArch64::CBNZx: + case AArch64::CBNZw: + IsCond = true; + Bits = 19 + 2; + break; + case AArch64::Bimm: + Bits = 26 + 2; + break; + } + + // Record this immediate branch. + ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); + } + } + } +} + +/// Compute the size and some alignment information for MBB. This function +/// updates BBInfo directly. +void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->getInstSizeInBytes(*I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = 2; + } +} + +/// Return the current offset of the specified machine instruction from the +/// start of the function. This offset changes as stuff is moved around inside +/// the function. +unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BBInfo[MBB->getNumber()].Offset; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + Offset += TII->getInstSizeInBytes(*I); + } + return Offset; +} + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update data structures and renumber blocks to +/// account for this change and returns the newly created block. +MachineBasicBlock * +AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MF->insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); + ++NumSplit; + + // Update the CFG. All succs of OrigBB are now succs of NewBB. + NewBB->transferSuccessors(OrigBB); + + // OrigBB branches to NewBB. + OrigBB->addSuccessor(NewBB); + + // Update internal data structures to account for the newly inserted MBB. + MF->RenumberBlocks(NewBB); + + // Insert an entry into BBInfo to align it properly with the (newly + // renumbered) block numbers. + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + computeBlockSize(OrigBB); + + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + computeBlockSize(NewBB); + + // All BBOffsets following these blocks must be modified. + adjustBBOffsetsAfter(OrigBB); + + return NewBB; +} + +void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) { + unsigned BBNum = BB->getNumber(); + for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. Stop if the offset is already correct, + // and we have updated 2 blocks. This is the maximum number of blocks + // changed before calling this function. + if (i > BBNum + 2 && + BBInfo[i].Offset == Offset && + BBInfo[i].KnownBits == KnownBits) + break; + + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; + } +} + +/// Returns true if the distance between specific MI and specific BB can fit in +/// MI's displacement field. +bool AArch64BranchFixup::isBBInRange(MachineInstr *MI, + MachineBasicBlock *DestBB, + unsigned OffsetBits) { + int64_t BrOffset = getOffsetOf(MI); + int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; + + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " bits available=" << OffsetBits + << " from " << getOffsetOf(MI) << " to " << DestOffset + << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); + + return isIntN(OffsetBits, DestOffset - BrOffset); +} + +/// Fix up an immediate branch whose destination is too far away to fit in its +/// displacement field. +bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isMBB()) { + DestBB = MI->getOperand(i).getMBB(); + break; + } + } + assert(DestBB && "Branch with no destination BB?"); + + // Check to see if the DestBB is already in-range. + if (isBBInRange(MI, DestBB, Br.OffsetBits)) + return false; + + assert(Br.IsCond && "Only conditional branches should need fixup"); + return fixupConditionalBr(Br); +} + +/// Fix up a conditional branch whose destination is too far away to fit in its +/// displacement field. It is converted to an inverse conditional branch + an +/// unconditional branch to the destination. +bool +AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *MBB = MI->getParent(); + unsigned CondBrMBBOperand = 0; + + // The general idea is to add an unconditional branch to the destination and + // invert the conditional branch to jump over it. Complications occur around + // fallthrough and unreachable ends to the block. + // b.lt L1 + // => + // b.ge L2 + // b L1 + // L2: + + // First we invert the conditional branch, by creating a replacement if + // necessary. This if statement contains all the special handling of different + // branch types. + if (MI->getOpcode() == AArch64::Bcc) { + // The basic block is operand number 1 for Bcc + CondBrMBBOperand = 1; + + A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); + CC = A64InvertCondCode(CC); + MI->getOperand(0).setImm(CC); + } else { + MachineInstrBuilder InvertedMI; + int InvertedOpcode; + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown branch type"); + case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; + case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; + case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; + case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; + case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; + case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; + case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; + case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; + } + + InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); + for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { + InvertedMI.addOperand(MI->getOperand(i)); + if (MI->getOperand(i).isMBB()) + CondBrMBBOperand = i; + } + + MI->eraseFromParent(); + MI = Br.MI = InvertedMI; + } + + // If the branch is at the end of its MBB and that has a fall-through block, + // direct the updated conditional branch to the fall-through + // block. Otherwise, split the MBB before the next instruction. + MachineInstr *BMI = &MBB->back(); + bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); + + ++NumCBrFixed; + if (BMI != MI) { + if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + BMI->getOpcode() == AArch64::Bimm) { + // Last MI in the BB is an unconditional branch. We can swap destinations: + // b.eq L1 (temporarily b.ne L1 after first change) + // b L2 + // => + // b.ne L2 + // b L1 + MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + if (isBBInRange(MI, NewDest, Br.OffsetBits)) { + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " + << *BMI); + MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); + BMI->getOperand(0).setMBB(DestBB); + MI->getOperand(CondBrMBBOperand).setMBB(NewDest); + return true; + } + } + } + + if (NeedSplit) { + MachineBasicBlock::iterator MBBI = MI; ++MBBI; + splitBlockBeforeInstr(MBBI); + // No need for the branch to the next block. We're adding an unconditional + // branch to the destination. + int delta = TII->getInstSizeInBytes(MBB->back()); + BBInfo[MBB->getNumber()].Size -= delta; + MBB->back().eraseFromParent(); + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below + } + + // After splitting and removing the unconditional branch from the original BB, + // the structure is now: + // oldbb: + // [things] + // b.invertedCC L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + // + // We now have to change the conditional branch to point to splitbb and add an + // unconditional branch after it to L1, giving the final structure: + // oldbb: + // [things] + // b.invertedCC splitbb + // b L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + + DEBUG(dbgs() << " Insert B to BB#" + << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() + << " also invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"); + + // Insert a new unconditional branch and fixup the destination of the + // conditional one. Also update the ImmBranch as well as adding a new entry + // for the new branch. + BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) + .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); + MI->getOperand(CondBrMBBOperand).setMBB(NextBB); + + BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); + + // 26 bits written down in Bimm, specifying a multiple of 4. + unsigned OffsetBits = 26 + 2; + ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); + + adjustBBOffsetsAfter(MBB); + return true; +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td new file mode 100644 index 0000000..b880d83 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td @@ -0,0 +1,196 @@ +//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for AArch64 architecture. +//===----------------------------------------------------------------------===// + + +// The AArch64 Procedure Call Standard is unfortunately specified at a slightly +// higher level of abstraction than LLVM's target interface presents. In +// particular, it refers (like other ABIs, in fact) directly to +// structs. However, generic LLVM code takes the liberty of lowering structure +// arguments to the component fields before we see them. +// +// As a result, the obvious direct map from LLVM IR to PCS concepts can't be +// implemented, so the goals of this calling convention are, in decreasing +// priority order: +// 1. Expose *some* way to express the concepts required to implement the +// generic PCS from a front-end. +// 2. Provide a sane ABI for pure LLVM. +// 3. Follow the generic PCS as closely as is naturally possible. +// +// The suggested front-end implementation of PCS features is: +// * Integer, float and vector arguments of all sizes which end up in +// registers are passed and returned via the natural LLVM type. +// * Structure arguments with size <= 16 bytes are passed and returned in +// registers as similar integer or composite types. For example: +// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed). +// * HFAs in registers follow rules similar to small structs: appropriate +// composite types. +// * Structure arguments with size > 16 bytes are passed via a pointer, +// handled completely by the front-end. +// * Structure return values > 16 bytes via an sret pointer argument. +// * Other stack-based arguments (not large structs) are passed using byval +// pointers. Padding arguments are added beforehand to guarantee a large +// struct doesn't later use integer registers. +// +// N.b. this means that it is the front-end's responsibility (if it cares about +// PCS compliance) to check whether enough registers are available for an +// argument when deciding how to pass it. + +class CCIfAlign<int Align, CCAction A>: + CCIf<"ArgFlags.getOrigAlign() == " # Align, A>; + +def CC_A64_APCS : CallingConv<[ + // SRet is an LLVM-specific concept, so it takes precedence over general ABI + // concerns. However, this rule will be used by C/C++ frontends to implement + // structure return. + CCIfSRet<CCAssignToReg<[X8]>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal<CCPassByVal<8, 8>>, + + // Canonicalise the various types that live in different floating-point + // registers. This makes sense because the PCS does not distinguish Short + // Vectors and Floating-point types. + CCIfType<[v2i8], CCBitConvertToType<f16>>, + CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>, + CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCBitConvertToType<f128>>, + + // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision + // Floating-point or Short Vector Type and the NSRN is less than 8, then the + // argument is allocated to the least significant bits of register + // v[NSRN]. The NSRN is incremented by one. The argument has now been + // allocated." + CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated + // SIMD and Floating-point registers (NSRN - number of elements < 8), then the + // argument is allocated to SIMD and Floating-point registers (with one + // register per element of the HFA). The NSRN is incremented by the number of + // registers used. The argument has now been allocated." + // + // N.b. As above, this rule is the responsibility of the front-end. + + // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of + // the argument is rounded up to the nearest multiple of 8 bytes." + // + // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short + // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural + // Alignment of the Argument's type." + // + // It is expected that these will be satisfied by adding dummy arguments to + // the prototype. + + // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point + // type then the size of the argument is set to 8 bytes. The effect is as if + // the argument had been copied to the least significant bits of a 64-bit + // register and the remaining bits filled with unspecified values." + CCIfType<[f16, f32], CCPromoteToType<f64>>, + + // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad- + // precision Floating-point or Short Vector Type, then the argument is copied + // to memory at the adjusted NSAA. The NSAA is incremented by the size of the + // argument. The argument has now been allocated." + CCIfType<[f64], CCAssignToStack<8, 8>>, + CCIfType<[f128], CCAssignToStack<16, 16>>, + + // PCS: "C.7: If the argument is an Integral Type, the size of the argument is + // less than or equal to 8 bytes and the NGRN is less than 8, the argument is + // copied to the least significant bits of x[NGRN]. The NGRN is incremented by + // one. The argument has now been allocated." + + // First we implement C.8 and C.9 (128-bit types get even registers). i128 is + // represented as two i64s, the first one being split. If we delayed this + // operation C.8 would never be reached. + CCIfType<[i64], + CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>, + + // Note: the promotion also implements C.14. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // And now the real implementation of C.7 + CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, + + // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded + // up to the next even number." + // + // "C.9: If the argument is an Integral Type, the size of the argument is + // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] + // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the + // memory representation of the argument. The NGRN is incremented by two. The + // argument has now been allocated." + // + // Subtlety here: what if alignment is 16 but it is not an integral type? All + // floating-point types have been allocated already, which leaves composite + // types: this is why a front-end may need to produce i128 for a struct <= 16 + // bytes. + + // PCS: "C.10 If the argument is a Composite Type and the size in double-words + // of the argument is not more than 8 minus NGRN, then the argument is copied + // into consecutive general-purpose registers, starting at x[NGRN]. The + // argument is passed as though it had been loaded into the registers from a + // double-word aligned address with an appropriate sequence of LDR + // instructions loading consecutive registers from memory (the contents of any + // unused parts of the registers are unspecified by this standard). The NGRN + // is incremented by the number of registers used. The argument has now been + // allocated." + // + // Another one that's the responsibility of the front-end (sigh). + + // PCS: "C.11: The NGRN is set to 8." + CCCustom<"CC_AArch64NoMoreRegs">, + + // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural + // Alignment of the argument's type." + // + // PCS: "C.13: If the argument is a composite type then the argument is copied + // to memory at the adjusted NSAA. The NSAA is by the size of the + // argument. The argument has now been allocated." + // + // Note that the effect of this corresponds to a memcpy rather than register + // stores so that the struct ends up correctly addressable at the adjusted + // NSAA. + + // PCS: "C.14: If the size of the argument is less than 8 bytes then the size + // of the argument is set to 8 bytes. The effect is as if the argument was + // copied to the least significant bits of a 64-bit register and the remaining + // bits filled with unspecified values." + // + // Integer types were widened above. Floating-point and composite types have + // already been allocated completely. Nothing to do. + + // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA + // is incremented by the size of the argument. The argument has now been + // allocated." + CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, + CCIfType<[i64], CCAssignToStack<8, 8>> + +]>; + +// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits +// of vector registers (8-15) are callee-saved. The order here is is picked up +// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of +// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at +// [sp-16], ... +def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19), + (sequence "D%u", 15, 8))>; + + +// TLS descriptor calls are extremely restricted in their changes, to allow +// optimisations in the (hopefully) more common fast path where no real action +// is needed. They actually have to preserve all registers, except for the +// unavoidable X30 and the return register X0. +def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1), + (sequence "Q%u", 31, 0))>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp new file mode 100644 index 0000000..dc41f2f --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -0,0 +1,633 @@ +//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64FrameLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64InstrInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +void AArch64FrameLowering::splitSPAdjustments(uint64_t Total, + uint64_t &Initial, + uint64_t &Residual) const { + // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP + // instructions have a 7-bit signed immediate scaled by 8, giving a reach of + // 0x1f8, but stack adjustment should always be a multiple of 16. + if (Total <= 0x1f0) { + Initial = Total; + Residual = 0; + } else { + Initial = 0x1f0; + Residual = Total - Initial; + } +} + +void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + MachineModuleInfo &MMI = MF.getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + bool NeedsFrameMoves = MMI.hasDebugInfo() + || MF.getFunction()->needsUnwindTableEntry(); + + uint64_t NumInitialBytes, NumResidualBytes; + + // Currently we expect the stack to be laid out by + // sub sp, sp, #initial + // stp x29, x30, [sp, #offset] + // ... + // str xxx, [sp, #offset] + // sub sp, sp, #rest (possibly via extra instructions). + if (MFI->getCalleeSavedInfo().size()) { + // If there are callee-saved registers, we want to store them efficiently as + // a block, and virtual base assignment happens too early to do it for us so + // we adjust the stack in two phases: first just for callee-saved fiddling, + // then to allocate the rest of the frame. + splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); + } else { + // If there aren't any callee-saved registers, two-phase adjustment is + // inefficient. It's more efficient to adjust with NumInitialBytes too + // because when we're in a "callee pops argument space" situation, that pop + // must be tacked onto Initial for correctness. + NumInitialBytes = MFI->getStackSize(); + NumResidualBytes = 0; + } + + // Tell everyone else how much adjustment we're expecting them to use. In + // particular if an adjustment is required for a tail call the epilogue could + // have a different view of things. + FuncInfo->setInitialStackAdjust(NumInitialBytes); + + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, + MachineInstr::FrameSetup); + + if (NeedsFrameMoves && NumInitialBytes) { + // We emit this update even if the CFA is set from a frame pointer later so + // that the CFA is valid in the interim. + MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(SPLabel); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, NumInitialBytes); + Moves.push_back(MachineMove(SPLabel, Dst, Src)); + } + + // Otherwise we need to set the frame pointer and/or add a second stack + // adjustment. + + bool FPNeedsSetting = hasFP(MF); + for (; MBBI != MBB.end(); ++MBBI) { + // Note that this search makes strong assumptions about the operation used + // to store the frame-pointer: it must be "STP x29, x30, ...". This could + // change in future, but until then there's no point in implementing + // untestable more generic cases. + if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR + && MBBI->getOperand(0).getReg() == AArch64::X29) { + int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); + FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); + + ++MBBI; + emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, + AArch64::X29, + NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), + MachineInstr::FrameSetup); + + // The offset adjustment used when emitting debugging locations relative + // to whatever frame base is set. AArch64 uses the default frame base (FP + // or SP) and this adjusts the calculations to be correct. + MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) + - MFI->getStackSize()); + + if (NeedsFrameMoves) { + MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(FPLabel); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx)); + Moves.push_back(MachineMove(FPLabel, Dst, Src)); + } + + FPNeedsSetting = false; + } + + if (!MBBI->getFlag(MachineInstr::FrameSetup)) + break; + } + + assert(!FPNeedsSetting && "Frame pointer couldn't be set"); + + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, + MachineInstr::FrameSetup); + + // Now we emit the rest of the frame setup information, if necessary: we've + // already noted the FP and initial SP moves so we're left with the prologue's + // final SP update and callee-saved register locations. + if (!NeedsFrameMoves) + return; + + // Reuse the label if appropriate, so create it in this outer scope. + MCSymbol *CSLabel = 0; + + // The rest of the stack adjustment + if (!hasFP(MF) && NumResidualBytes) { + CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(CSLabel); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes); + Moves.push_back(MachineMove(CSLabel, Dst, Src)); + } + + // And any callee-saved registers (it's fine to leave them to the end here, + // because the old values are still valid at this point. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + if (CSI.size()) { + if (!CSLabel) { + CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(CSLabel); + } + + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + MachineLocation Dst(MachineLocation::VirtualFP, + MFI->getObjectOffset(I->getFrameIdx())); + MachineLocation Src(I->getReg()); + Moves.push_back(MachineMove(CSLabel, Dst, Src)); + } + } +} + +void +AArch64FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + DebugLoc DL = MBBI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned RetOpcode = MBBI->getOpcode(); + + // Initial and residual are named for consitency with the prologue. Note that + // in the epilogue, the residual adjustment is executed first. + uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); + uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; + uint64_t ArgumentPopSize = 0; + if (RetOpcode == AArch64::TC_RETURNdi || + RetOpcode == AArch64::TC_RETURNxi) { + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(1); + + MachineInstrBuilder MIB; + if (RetOpcode == AArch64::TC_RETURNdi) { + MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); + if (JumpTarget.isGlobal()) { + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else { + assert(JumpTarget.isSymbol() && "unexpected tail call destination"); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + } else { + assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() + && "Unexpected tail call"); + + MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); + MIB.addReg(JumpTarget.getReg(), RegState::Kill); + } + + // Add the extra operands onto the new tail call instruction even though + // they're not used directly (so that liveness is tracked properly etc). + for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) + MIB->addOperand(MBBI->getOperand(i)); + + + // Delete the pseudo instruction TC_RETURN. + MachineInstr *NewMI = prior(MBBI); + MBB.erase(MBBI); + MBBI = NewMI; + + // For a tail-call in a callee-pops-arguments environment, some or all of + // the stack may actually be in use for the call's arguments, this is + // calculated during LowerCall and consumed here... + ArgumentPopSize = StackAdjust.getImm(); + } else { + // ... otherwise the amount to pop is *all* of the argument space, + // conveniently stored in the MachineFunctionInfo by + // LowerFormalArguments. This will, of course, be zero for the C calling + // convention. + ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); + } + + assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 + && "refusing to adjust stack by misaligned amt"); + + // We may need to address callee-saved registers differently, so find out the + // bound on the frame indices. + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The "residual" stack update comes first from this direction and guarantees + // that SP is NumInitialBytes below its value on function entry, either by a + // direct update or restoring it from the frame pointer. + if (NumInitialBytes + ArgumentPopSize != 0) { + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, + NumInitialBytes + ArgumentPopSize); + --MBBI; + } + + + // MBBI now points to the instruction just past the last callee-saved + // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" + // otherwise). + + // Now we need to find out where to put the bulk of the stack adjustment + MachineBasicBlock::iterator FirstEpilogue = MBBI; + while (MBBI != MBB.begin()) { + --MBBI; + + unsigned FrameOp; + for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { + if (MBBI->getOperand(FrameOp).isFI()) + break; + } + + // If this instruction doesn't have a frame index we've reached the end of + // the callee-save restoration. + if (FrameOp == MBBI->getNumOperands()) + break; + + // Likewise if it *is* a local reference, but not to a callee-saved object. + int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); + if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) + break; + + FirstEpilogue = MBBI; + } + + if (MF.getFrameInfo()->hasVarSizedObjects()) { + int64_t StaticFrameBase; + StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); + emitRegUpdate(MBB, FirstEpilogue, DL, TII, + AArch64::XSP, AArch64::X29, AArch64::NoRegister, + StaticFrameBase); + } else { + emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); + } +} + +int64_t +AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, + int FrameIndex, + unsigned &FrameReg, + int SPAdj, + bool IsCalleeSaveOp) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex); + + assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0) + && "callee-saved register in unexpected place"); + + // If the frame for this function is particularly large, we adjust the stack + // in two phases which means the callee-save related operations see a + // different (intermediate) stack size. + int64_t FrameRegPos; + if (IsCalleeSaveOp) { + FrameReg = AArch64::XSP; + FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust()); + } else if (useFPForAddressing(MF)) { + // Have to use the frame pointer since we have no idea where SP is. + FrameReg = AArch64::X29; + FrameRegPos = FuncInfo->getFramePointerOffset(); + } else { + FrameReg = AArch64::XSP; + FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj; + } + + return TopOfFrameOffset - FrameRegPos; +} + +void +AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + const AArch64RegisterInfo *RegInfo = + static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo()); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64InstrInfo &TII = + *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); + + if (hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(AArch64::X29); + MF.getRegInfo().setPhysRegUsed(AArch64::X30); + } + + // If addressing of local variables is going to be more complicated than + // shoving a base register and an offset into the instruction then we may well + // need to scavenge registers. We should either specifically add an + // callee-save register for this purpose or allocate an extra spill slot. + + bool BigStack = + (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + || MFI->hasVarSizedObjects() // Access will be from X29: messes things up + || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); + + if (!BigStack) + return; + + // We certainly need some slack space for the scavenger, preferably an extra + // register. + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + uint16_t ExtraReg = AArch64::NoRegister; + + for (unsigned i = 0; CSRegs[i]; ++i) { + if (AArch64::GPR64RegClass.contains(CSRegs[i]) && + !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) { + ExtraReg = CSRegs[i]; + break; + } + } + + if (ExtraReg != 0) { + MF.getRegInfo().setPhysRegUsed(ExtraReg); + } else { + // Create a stack slot for scavenging purposes. PrologEpilogInserter + // helpfully places it near either SP or FP for us to avoid + // infinitely-regression during scavenging. + const TargetRegisterClass *RC = &AArch64::GPR64RegClass; + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } +} + +bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB, + unsigned Reg) const { + // If @llvm.returnaddress is called then it will refer to X30 by some means; + // the prologue store does not kill the register. + if (Reg == AArch64::X30) { + if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken() + && MBB.getParent()->getRegInfo().isLiveIn(Reg)) + return false; + } + + // In all other cases, physical registers are dead after they've been saved + // but live at the beginning of the prologue block. + MBB.addLiveIn(Reg); + return true; +} + +void +AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI, + LoadStoreMethod PossClasses[], + unsigned NumClasses) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // A certain amount of implicit contract is present here. The actual stack + // offsets haven't been allocated officially yet, so for strictly correct code + // we rely on the fact that the elements of CSI are allocated in order + // starting at SP, purely as dictated by size and alignment. In practice since + // this function handles the only accesses to those slots it's not quite so + // important. + // + // We have also ordered the Callee-saved register list in AArch64CallingConv + // so that the above scheme puts registers in order: in particular we want + // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) + for (unsigned i = 0, e = CSI.size(); i < e; ++i) { + unsigned Reg = CSI[i].getReg(); + + // First we need to find out which register class the register belongs to so + // that we can use the correct load/store instrucitons. + unsigned ClassIdx; + for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { + if (PossClasses[ClassIdx].RegClass->contains(Reg)) + break; + } + assert(ClassIdx != NumClasses + && "Asked to store register in unexpected class"); + const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; + + // Now we need to decide whether it's possible to emit a paired instruction: + // for this we want the next register to be in the same class. + MachineInstrBuilder NewMI; + bool Pair = false; + if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { + Pair = true; + unsigned StLow = 0, StHigh = 0; + if (isPrologue) { + // Most of these registers will be live-in to the MBB and killed by our + // store, though there are exceptions (see determinePrologueDeath). + StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); + StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + } else { + StLow = RegState::Define; + StHigh = RegState::Define; + } + + NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) + .addReg(CSI[i+1].getReg(), StLow) + .addReg(CSI[i].getReg(), StHigh); + + // If it's a paired op, we've consumed two registers + ++i; + } else { + unsigned State; + if (isPrologue) { + State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + } else { + State = RegState::Define; + } + + NewMI = BuildMI(MBB, MBBI, DL, + TII.get(PossClasses[ClassIdx].SingleOpcode)) + .addReg(CSI[i].getReg(), State); + } + + // Note that the FrameIdx refers to the second register in a pair: it will + // be allocated the smaller numeric address and so is the one an LDP/STP + // address must use. + int FrameIdx = CSI[i].getFrameIdx(); + MachineMemOperand::MemOperandFlags Flags; + Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + Flags, + Pair ? TheClass.getSize() * 2 : TheClass.getSize(), + MFI.getObjectAlignment(FrameIdx)); + + NewMI.addFrameIndex(FrameIdx) + .addImm(0) // address-register offset + .addMemOperand(MMO); + + if (isPrologue) + NewMI.setMIFlags(MachineInstr::FrameSetup); + + // For aesthetic reasons, during an epilogue we want to emit complementary + // operations to the prologue, but in the opposite order. So we still + // iterate through the CalleeSavedInfo list in order, but we put the + // instructions successively earlier in the MBB. + if (!isPrologue) + --MBBI; + } +} + +bool +AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + static LoadStoreMethod PossibleClasses[] = { + {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, + {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, + }; + unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + + emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, + PossibleClasses, NumClasses); + + return true; +} + +bool +AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + if (CSI.empty()) + return false; + + static LoadStoreMethod PossibleClasses[] = { + {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, + {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, + }; + unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + + emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, + PossibleClasses, NumClasses); + + return true; +} + +bool +AArch64FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + + // This is a decision of ABI compliance. The AArch64 PCS gives various options + // for conformance, and even at the most stringent level more or less permits + // elimination for leaf functions because there's no loss of functionality + // (for debugging etc).. + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls()) + return true; + + // The following are hard-limits: incorrect code will be generated if we try + // to omit the frame. + return (RI->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); +} + +bool +AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const { + return MF.getFrameInfo()->hasVarSizedObjects(); +} + +bool +AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Of the various reasons for having a frame pointer, it's actually only + // variable-sized objects that prevent reservation of a call frame. + return !(hasFP(MF) && MFI->hasVarSizedObjects()); +} + +void +AArch64FrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + const AArch64InstrInfo &TII = + *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MI->getDebugLoc(); + int Opcode = MI->getOpcode(); + bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; + + if (!hasReservedCallFrame(MF)) { + unsigned Align = getStackAlignment(); + + int64_t Amount = MI->getOperand(0).getImm(); + Amount = RoundUpToAlignment(Amount, Align); + if (!IsDestroy) Amount = -Amount; + + // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it + // doesn't have to pop anything), then the first operand will be zero too so + // this adjustment is a no-op. + if (CalleePopAmount == 0) { + // FIXME: in-function stack adjustment for calls is limited to 12-bits + // because there's no guaranteed temporary register available. Mostly call + // frames will be allocated at the start of a function so this is OK, but + // it is a limitation that needs dealing with. + assert(Amount > -0xfff && Amount < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); + } + } else if (CalleePopAmount != 0) { + // If the calling convention demands that the callee pops arguments from the + // stack, we want to add it back if we have a reserved call frame. + assert(CalleePopAmount < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); + } + + MBB.erase(MI); +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h new file mode 100644 index 0000000..45ea0ec --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -0,0 +1,108 @@ +//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the AArch64-specific parts of the TargetFrameLowering +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_FRAMEINFO_H +#define LLVM_AARCH64_FRAMEINFO_H + +#include "AArch64Subtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { +class AArch64Subtarget; + +class AArch64FrameLowering : public TargetFrameLowering { +private: + // In order to unify the spilling and restoring of callee-saved registers into + // emitFrameMemOps, we need to be able to specify which instructions to use + // for the relevant memory operations on each register class. An array of the + // following struct is populated and passed in to achieve this. + struct LoadStoreMethod { + const TargetRegisterClass *RegClass; // E.g. GPR64RegClass + + // The preferred instruction. + unsigned PairOpcode; // E.g. LSPair64_STR + + // Sometimes only a single register can be handled at once. + unsigned SingleOpcode; // E.g. LS64_STR + }; +protected: + const AArch64Subtarget &STI; + +public: + explicit AArch64FrameLowering(const AArch64Subtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16), + STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + /// Decides how much stack adjustment to perform in each phase of the prologue + /// and epilogue. + void splitSPAdjustments(uint64_t Total, uint64_t &Initial, + uint64_t &Residual) const; + + int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex, + unsigned &FrameReg, int SPAdj, + bool IsCalleeSaveOp) const; + + virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + /// If the register is X30 (i.e. LR) and the return address is used in the + /// function then the callee-save store doesn't actually kill the register, + /// otherwise it does. + bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const; + + /// This function emits the loads or stores required during prologue and + /// epilogue as efficiently as possible. + /// + /// The operations involved in setting up and tearing down the frame are + /// similar enough to warrant a shared function, particularly as discrepancies + /// between the two would be disastrous. + void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI, + LoadStoreMethod PossibleClasses[], + unsigned NumClasses) const; + + + virtual bool hasFP(const MachineFunction &MF) const; + + virtual bool useFPForAddressing(const MachineFunction &MF) const; + + /// On AA + virtual bool hasReservedCallFrame(const MachineFunction &MF) const; + +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp new file mode 100644 index 0000000..46b8221 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -0,0 +1,415 @@ +//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the AArch64 target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-isel" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===--------------------------------------------------------------------===// +/// AArch64 specific code to select AArch64 machine instructions for +/// SelectionDAG operations. +/// +namespace { + +class AArch64DAGToDAGISel : public SelectionDAGISel { + AArch64TargetMachine &TM; + const AArch64InstrInfo *TII; + + /// Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when generating code for different targets. + const AArch64Subtarget *Subtarget; + +public: + explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm), + TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { + } + + virtual const char *getPassName() const { + return "AArch64 Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. +#include "AArch64GenDAGISel.inc" + + template<unsigned MemSize> + bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN || CN->getZExtValue() % MemSize != 0 + || CN->getZExtValue() / MemSize > 0xfff) + return false; + + UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); + return true; + } + + template<unsigned RegWidth> + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { + return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); + } + + bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); + + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth); + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); + + bool SelectLogicalImm(SDValue N, SDValue &Imm); + + template<unsigned RegWidth> + bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { + return SelectTSTBOperand(N, FixedPos, RegWidth); + } + + bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + + SDNode *TrySelectToMoveImm(SDNode *N); + SDNode *LowerToFPLitPool(SDNode *Node); + SDNode *SelectToLitPool(SDNode *N); + + SDNode* Select(SDNode*); +private: +}; +} + +bool +AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); + if (!CN) return false; + + // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits + // is between 1 and 32 for a destination w-register, or 1 and 64 for an + // x-register. + // + // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we + // want THIS_NODE to be 2^fbits. This is much easier to deal with using + // integers. + bool IsExact; + + // fbits is between 1 and 64 in the worst-case, which means the fmul + // could have 2^64 as an actual operand. Need 65 bits of precision. + APSInt IntVal(65, true); + CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); + + // N.b. isPowerOf2 also checks for > 0. + if (!IsExact || !IntVal.isPowerOf2()) return false; + unsigned FBits = IntVal.logBase2(); + + // Checks above should have guaranteed that we haven't lost information in + // finding FBits, but it must still be in range. + if (FBits == 0 || FBits > RegWidth) return false; + + FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); + return true; +} + +bool +AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) { + switch (ConstraintCode) { + default: llvm_unreachable("Unrecognised AArch64 memory constraint"); + case 'm': + // FIXME: more freedom is actually permitted for 'm'. We can go + // hunting for a base and an offset if we want. Of course, since + // we don't really know how the operand is going to be used we're + // probably restricted to the load/store pair's simm7 as an offset + // range anyway. + case 'Q': + OutOps.push_back(Op); + } + + return false; +} + +bool +AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { + ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N); + if (!Imm || !Imm->getValueAPF().isPosZero()) + return false; + + // Doesn't actually carry any information, but keeps TableGen quiet. + Dummy = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { + uint32_t Bits; + uint32_t RegWidth = N.getValueType().getSizeInBits(); + + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) return false; + + if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) + return false; + + Imm = CurDAG->getTargetConstant(Bits, MVT::i32); + return true; +} + +SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { + SDNode *ResNode; + DebugLoc dl = Node->getDebugLoc(); + EVT DestType = Node->getValueType(0); + unsigned DestWidth = DestType.getSizeInBits(); + + unsigned MOVOpcode; + EVT MOVType; + int UImm16, Shift; + uint32_t LogicalBits; + + uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue(); + if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { + MOVType = DestType; + MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; + } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { + MOVType = DestType; + MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; + } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { + // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can + // use a 32-bit instruction: "movn w0, 0xedbc". + MOVType = MVT::i32; + MOVOpcode = AArch64::MOVNwii; + } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { + MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; + uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; + + return CurDAG->getMachineNode(MOVOpcode, dl, DestType, + CurDAG->getRegister(ZR, DestType), + CurDAG->getTargetConstant(LogicalBits, MVT::i32)); + } else { + // Can't handle it in one instruction. There's scope for permitting two (or + // more) instructions, but that'll need more thought. + return NULL; + } + + ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, + CurDAG->getTargetConstant(UImm16, MVT::i32), + CurDAG->getTargetConstant(Shift, MVT::i32)); + + if (MOVType != DestType) { + ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, + MVT::i64, MVT::i32, MVT::Other, + CurDAG->getTargetConstant(0, MVT::i64), + SDValue(ResNode, 0), + CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); + } + + return ResNode; +} + +SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { + DebugLoc DL = Node->getDebugLoc(); + uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue(); + int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue(); + EVT DestType = Node->getValueType(0); + EVT PtrVT = TLI.getPointerTy(); + + // Since we may end up loading a 64-bit constant from a 32-bit entry the + // constant in the pool may have a different type to the eventual node. + ISD::LoadExtType Extension; + EVT MemType; + + assert((DestType == MVT::i64 || DestType == MVT::i32) + && "Only expect integer constants at the moment"); + + if (DestType == MVT::i32) { + Extension = ISD::NON_EXTLOAD; + MemType = MVT::i32; + } else if (UnsignedVal <= UINT32_MAX) { + Extension = ISD::ZEXTLOAD; + MemType = MVT::i32; + } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { + Extension = ISD::SEXTLOAD; + MemType = MVT::i32; + } else { + Extension = ISD::NON_EXTLOAD; + MemType = MVT::i64; + } + + Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), + MemType.getSizeInBits()), + UnsignedVal); + SDValue PoolAddr; + unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType()); + PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, + AArch64II::MO_LO12), + CurDAG->getConstant(Alignment, MVT::i32)); + + return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), + PoolAddr, + MachinePointerInfo::getConstantPool(), MemType, + /* isVolatile = */ false, + /* isNonTemporal = */ false, + Alignment).getNode(); +} + +SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { + DebugLoc DL = Node->getDebugLoc(); + const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue(); + EVT PtrVT = TLI.getPointerTy(); + EVT DestType = Node->getValueType(0); + + unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType()); + SDValue PoolAddr; + + assert(TM.getCodeModel() == CodeModel::Small && + "Only small code model supported"); + PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, + AArch64II::MO_LO12), + CurDAG->getConstant(Alignment, MVT::i32)); + + return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, + MachinePointerInfo::getConstantPool(), + /* isVolatile = */ false, + /* isNonTemporal = */ false, + /* isInvariant = */ true, + Alignment).getNode(); +} + +bool +AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) return false; + + uint64_t Val = CN->getZExtValue(); + + if (!isPowerOf2_64(Val)) return false; + + unsigned TestedBit = Log2_64(Val); + // Checks above should have guaranteed that we haven't lost information in + // finding TestedBit, but it must still be in range. + if (TestedBit >= RegWidth) return false; + + FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); + return true; +} + +SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { + // Dump information about the Node being selected + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); + + if (Node->isMachineOpcode()) { + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); + return NULL; + } + + switch (Node->getOpcode()) { + case ISD::FrameIndex: { + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + EVT PtrTy = TLI.getPointerTy(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); + return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, + TFI, CurDAG->getTargetConstant(0, PtrTy)); + } + case ISD::ConstantPool: { + // Constant pools are fine, just create a Target entry. + ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node); + const Constant *C = CN->getConstVal(); + SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); + + ReplaceUses(SDValue(Node, 0), CP); + return NULL; + } + case ISD::Constant: { + SDNode *ResNode = 0; + if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) { + // XZR and WZR are probably even better than an actual move: most of the + // time they can be folded into another instruction with *no* cost. + + EVT Ty = Node->getValueType(0); + assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); + uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; + ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + Node->getDebugLoc(), + Register, Ty).getNode(); + } + + // Next best option is a move-immediate, see if we can do that. + if (!ResNode) { + ResNode = TrySelectToMoveImm(Node); + } + + if (ResNode) + return ResNode; + + // If even that fails we fall back to a lit-pool entry at the moment. Future + // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions. + ResNode = SelectToLitPool(Node); + assert(ResNode && "We need *some* way to materialise a constant"); + + // We want to continue selection at this point since the litpool access + // generated used generic nodes for simplicity. + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + Node = ResNode; + break; + } + case ISD::ConstantFP: { + if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) { + // FMOV will take care of it from TableGen + break; + } + + SDNode *ResNode = LowerToFPLitPool(Node); + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + + // We want to continue selection at this point since the litpool access + // generated used generic nodes for simplicity. + Node = ResNode; + break; + } + default: + break; // Let generic code handle it + } + + SDNode *ResNode = SelectCode(Node); + + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << "\n"); + + return ResNode; +} + +/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for +/// instruction scheduling. +FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new AArch64DAGToDAGISel(TM, OptLevel); +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp new file mode 100644 index 0000000..e9f4497 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -0,0 +1,2975 @@ +//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that AArch64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-isel" +#include "AArch64.h" +#include "AArch64ISelLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "AArch64TargetObjectFile.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/CallingConv.h" + +using namespace llvm; + +static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { + const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>(); + + if (Subtarget->isTargetLinux()) + return new AArch64LinuxTargetObjectFile(); + if (Subtarget->isTargetELF()) + return new TargetLoweringObjectFileELF(); + llvm_unreachable("unknown subtarget type"); +} + + +AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) + : TargetLowering(TM, createTLOF(TM)), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()), + RegInfo(TM.getRegisterInfo()), + Itins(TM.getInstrItineraryData()) { + + // SIMD compares set the entire lane's bits to 1 + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Scalar register <-> type mapping + addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); + addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); + addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); + + computeRegisterProperties(); + + // Some atomic operations can be folded into load-acquire or store-release + // instructions on AArch64. It's marginally simpler to let LLVM expand + // everything out to a barrier and then recombine the (few) barriers we can. + setInsertFencesForAtomic(true); + setTargetDAGCombine(ISD::ATOMIC_FENCE); + setTargetDAGCombine(ISD::ATOMIC_STORE); + + // We combine OR nodes for bitfield and NEON BSL operations. + setTargetDAGCombine(ISD::OR); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::SRA); + + // AArch64 does not have i1 loads, or much of anything for i1 really. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + + setStackPointerRegisterToSaveRestore(AArch64::XSP); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + + // We'll lower globals to wrappers for selection. + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + + // A64 instructions have the comparison predicate attached to the user of the + // result, but having a separate comparison is valuable for matching. + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::i64, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + + setOperationAction(ISD::SETCC, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + // Legal floating-point operations. + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f64, Legal); + + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + + setOperationAction(ISD::FNEG, MVT::f32, Legal); + setOperationAction(ISD::FNEG, MVT::f64, Legal); + + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + + setOperationAction(ISD::FSQRT, MVT::f32, Legal); + setOperationAction(ISD::FSQRT, MVT::f64, Legal); + + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f128, Legal); + + // Illegal floating-point operations. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + + setOperationAction(ISD::FEXP, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + + setOperationAction(ISD::FPOWI, MVT::f32, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + + // Virtually no operation on f128 is legal, but LLVM can't expand them when + // there's a valid register class, so we need custom operations in most cases. + setOperationAction(ISD::FABS, MVT::f128, Expand); + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f128, Expand); + setOperationAction(ISD::FPOW, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FSINCOS, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + setOperationAction(ISD::SETCC, MVT::f128, Custom); + setOperationAction(ISD::BR_CC, MVT::f128, Custom); + setOperationAction(ISD::SELECT, MVT::f128, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + + // Lowering for many of the conversions is actually specified by the non-f128 + // type. The LowerXXX function will be trivial when f128 isn't involved. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + + // This prevents LLVM trying to compress double constants into a floating + // constant-pool entry and trying to load from there. It's of doubtful benefit + // for A64: we'd need LDR followed by FCVT, I believe. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + + setExceptionPointerRegister(AArch64::X0); + setExceptionSelectorRegister(AArch64::X1); +} + +EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { + // It's reasonably important that this value matches the "natural" legal + // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself + // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64). + if (!VT.isVector()) return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc, + unsigned &strOpc) { + switch (Size) { + default: llvm_unreachable("unsupported size for atomic binary op!"); + case 1: + ldrOpc = AArch64::LDXR_byte; + strOpc = AArch64::STXR_byte; + break; + case 2: + ldrOpc = AArch64::LDXR_hword; + strOpc = AArch64::STXR_hword; + break; + case 4: + ldrOpc = AArch64::LDXR_word; + strOpc = AArch64::STXR_word; + break; + case 8: + ldrOpc = AArch64::LDXR_dword; + strOpc = AArch64::STXR_dword; + break; + } +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, + unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + const TargetRegisterClass *TRC + = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldxr dest, ptr + // <binop> scratch, dest, incr + // stxr stxr_status, scratch, ptr + // cbnz stxr_status, loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (BinOpcode) { + // All arithmetic operations we'll be creating are designed to take an extra + // shift or extend operand, which we can conveniently set to zero. + + // Operand order needs to go the other way for NAND. + if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl) + BuildMI(BB, dl, TII->get(BinOpcode), scratch) + .addReg(incr).addReg(dest).addImm(0); + else + BuildMI(BB, dl, TII->get(BinOpcode), scratch) + .addReg(dest).addReg(incr).addImm(0); + } + + // From the stxr, the register is GPR32; from the cmp it's GPR32wsp + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loopMBB); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + unsigned CmpOp, + A64CC::CondCodes Cond) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + unsigned oldval = dest; + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetRegisterClass *TRC, *TRCsp; + if (Size == 8) { + TRC = &AArch64::GPR64RegClass; + TRCsp = &AArch64::GPR64xspRegClass; + } else { + TRC = &AArch64::GPR32RegClass; + TRCsp = &AArch64::GPR32wspRegClass; + } + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + unsigned scratch = MRI.createVirtualRegister(TRC); + MRI.constrainRegClass(scratch, TRCsp); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldxr dest, ptr + // cmp incr, dest (, sign extend if necessary) + // csel scratch, dest, incr, cond + // stxr stxr_status, scratch, ptr + // cbnz stxr_status, loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + + // Build compare and cmov instructions. + MRI.constrainRegClass(incr, TRCsp); + BuildMI(BB, dl, TII->get(CmpOp)) + .addReg(incr).addReg(oldval).addImm(0); + + BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc), + scratch) + .addReg(oldval).addReg(incr).addImm(Cond); + + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status) + .addReg(scratch).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loopMBB); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const { + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned oldval = MI->getOperand(2).getReg(); + unsigned newval = MI->getOperand(3).getReg(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetRegisterClass *TRCsp; + TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineFunction *MF = BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; // insert the new blocks after the current block + + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // ldxr dest, [ptr] + // cmp dest, oldval + // b.ne exitMBB + BB = loop1MBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + + unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl; + MRI.constrainRegClass(dest, TRCsp); + BuildMI(BB, dl, TII->get(CmpOp)) + .addReg(dest).addReg(oldval).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + + // loop2MBB: + // strex stxr_status, newval, [ptr] + // cbnz stxr_status, loop1MBB + BB = loop2MBB; + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loop1MBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, + MachineBasicBlock *MBB) const { + // We materialise the F128CSEL pseudo-instruction using conditional branches + // and loads, giving an instruciton sequence like: + // str q0, [sp] + // b.ne IfTrue + // b Finish + // IfTrue: + // str q1, [sp] + // Finish: + // ldr q0, [sp] + // + // Using virtual registers would probably not be beneficial since COPY + // instructions are expensive for f128 (there's no actual instruction to + // implement them). + // + // An alternative would be to do an integer-CSEL on some address. E.g.: + // mov x0, sp + // add x1, sp, #16 + // str q0, [x0] + // str q1, [x1] + // csel x0, x0, x1, ne + // ldr q0, [x0] + // + // It's unclear which approach is actually optimal. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineFunction *MF = MBB->getParent(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction::iterator It = MBB; + ++It; + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned IfTrueReg = MI->getOperand(1).getReg(); + unsigned IfFalseReg = MI->getOperand(2).getReg(); + unsigned CondCode = MI->getOperand(3).getImm(); + bool NZCVKilled = MI->getOperand(4).isKill(); + + MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, TrueBB); + MF->insert(It, EndBB); + + // Transfer rest of current basic-block to EndBB + EndBB->splice(EndBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + EndBB->transferSuccessorsAndUpdatePHIs(MBB); + + // We need somewhere to store the f128 value needed. + int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16); + + // [... start of incoming MBB ...] + // str qIFFALSE, [sp] + // b.cc IfTrue + // b Done + BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR)) + .addReg(IfFalseReg) + .addFrameIndex(ScratchFI) + .addImm(0); + BuildMI(MBB, DL, TII->get(AArch64::Bcc)) + .addImm(CondCode) + .addMBB(TrueBB); + BuildMI(MBB, DL, TII->get(AArch64::Bimm)) + .addMBB(EndBB); + MBB->addSuccessor(TrueBB); + MBB->addSuccessor(EndBB); + + // IfTrue: + // str qIFTRUE, [sp] + BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR)) + .addReg(IfTrueReg) + .addFrameIndex(ScratchFI) + .addImm(0); + + // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the + // blocks. + TrueBB->addSuccessor(EndBB); + + // Done: + // ldr qDEST, [sp] + // [... rest of incoming MBB ...] + if (!NZCVKilled) + EndBB->addLiveIn(AArch64::NZCV); + MachineInstr *StartOfEnd = EndBB->begin(); + BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg) + .addFrameIndex(ScratchFI) + .addImm(0); + + MI->eraseFromParent(); + return EndBB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { + switch (MI->getOpcode()) { + default: llvm_unreachable("Unhandled instruction with custom inserter"); + case AArch64::F128CSEL: + return EmitF128CSEL(MI, MBB); + case AArch64::ATOMIC_LOAD_ADD_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl); + + case AArch64::ATOMIC_LOAD_SUB_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl); + + case AArch64::ATOMIC_LOAD_AND_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl); + + case AArch64::ATOMIC_LOAD_OR_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl); + + case AArch64::ATOMIC_LOAD_XOR_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl); + + case AArch64::ATOMIC_LOAD_NAND_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl); + + case AArch64::ATOMIC_LOAD_MIN_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT); + + case AArch64::ATOMIC_LOAD_MAX_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT); + + case AArch64::ATOMIC_LOAD_UMIN_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI); + + case AArch64::ATOMIC_LOAD_UMAX_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO); + + case AArch64::ATOMIC_SWAP_I8: + return emitAtomicBinary(MI, MBB, 1, 0); + case AArch64::ATOMIC_SWAP_I16: + return emitAtomicBinary(MI, MBB, 2, 0); + case AArch64::ATOMIC_SWAP_I32: + return emitAtomicBinary(MI, MBB, 4, 0); + case AArch64::ATOMIC_SWAP_I64: + return emitAtomicBinary(MI, MBB, 8, 0); + + case AArch64::ATOMIC_CMP_SWAP_I8: + return emitAtomicCmpSwap(MI, MBB, 1); + case AArch64::ATOMIC_CMP_SWAP_I16: + return emitAtomicCmpSwap(MI, MBB, 2); + case AArch64::ATOMIC_CMP_SWAP_I32: + return emitAtomicCmpSwap(MI, MBB, 4); + case AArch64::ATOMIC_CMP_SWAP_I64: + return emitAtomicCmpSwap(MI, MBB, 8); + } +} + + +const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC"; + case AArch64ISD::Call: return "AArch64ISD::Call"; + case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV"; + case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad"; + case AArch64ISD::BFI: return "AArch64ISD::BFI"; + case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; + case AArch64ISD::Ret: return "AArch64ISD::Ret"; + case AArch64ISD::SBFX: return "AArch64ISD::SBFX"; + case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC"; + case AArch64ISD::SETCC: return "AArch64ISD::SETCC"; + case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; + case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; + case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; + case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; + + default: return NULL; + } +} + +static const uint16_t AArch64FPRArgRegs[] = { + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, + AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7 +}; +static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs); + +static const uint16_t AArch64ArgRegs[] = { + AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, + AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 +}; +static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs); + +static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + // Mark all remaining general purpose registers as allocated. We don't + // backtrack: if (for example) an i128 gets put on the stack, no subsequent + // i64 will go in registers (C.11). + for (unsigned i = 0; i < NumArgRegs; ++i) + State.AllocateReg(AArch64ArgRegs[i]); + + return false; +} + +#include "AArch64GenCallingConv.inc" + +CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { + + switch(CC) { + default: llvm_unreachable("Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + return CC_A64_APCS; + } +} + +void +AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc DL, SDValue &Chain) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + + SmallVector<SDValue, 8> MemOps; + + unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs, + NumArgRegs); + unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs, + NumFPRArgRegs); + + unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR); + int GPRIdx = 0; + if (GPRSaveSize != 0) { + GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); + + SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 8), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(8, getPointerTy())); + } + } + + unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); + int FPRIdx = 0; + if (FPRSaveSize != 0) { + FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); + + SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], + &AArch64::FPR128RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 16), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } + } + + int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true); + + FuncInfo->setVariadicStackIdx(StackIdx); + FuncInfo->setVariadicGPRIdx(GPRIdx); + FuncInfo->setVariadicGPRSize(GPRSaveSize); + FuncInfo->setVariadicFPRIdx(FPRIdx); + FuncInfo->setVariadicFPRSize(FPRSaveSize); + + if (!MemOps.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], + MemOps.size()); + } +} + + +SDValue +AArch64TargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); + + SmallVector<SDValue, 16> ArgValues; + + SDValue ArgValue; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + if (Flags.isByVal()) { + // Byval is used for small structs and HFAs in the PCS, but the system + // should work in a non-compliant manner for larger structs. + EVT PtrTy = getPointerTy(); + int Size = Flags.getByValSize(); + unsigned NumRegs = (Size + 7) / 8; + + unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs, + VA.getLocMemOffset(), + false); + SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); + InVals.push_back(FrameIdxN); + + continue; + } else if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + const TargetRegisterClass *RC = getRegClassFor(RegVT); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + + ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + } else { // VA.isRegLoc() + assert(VA.isMemLoc()); + + int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); + + + } + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: { + unsigned DestSize = VA.getValVT().getSizeInBits(); + unsigned DestSubReg; + + switch (DestSize) { + case 8: DestSubReg = AArch64::sub_8; break; + case 16: DestSubReg = AArch64::sub_16; break; + case 32: DestSubReg = AArch64::sub_32; break; + case 64: DestSubReg = AArch64::sub_64; break; + default: llvm_unreachable("Unexpected argument promotion"); + } + + ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + VA.getValVT(), ArgValue, + DAG.getTargetConstant(DestSubReg, MVT::i32)), + 0); + break; + } + } + + InVals.push_back(ArgValue); + } + + if (isVarArg) + SaveVarArgRegisters(CCInfo, DAG, dl, Chain); + + unsigned StackArgSize = CCInfo.getNextStackOffset(); + if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { + // This is a non-standard ABI so by fiat I say we're allowed to make full + // use of the stack area to be popped, which must be aligned to 16 bytes in + // any case: + StackArgSize = RoundUpToAlignment(StackArgSize, 16); + + // If we're expected to restore the stack (e.g. fastcc) then we'll be adding + // a multiple of 16. + FuncInfo->setArgumentStackToRestore(StackArgSize); + + // This realignment carries over to the available bytes below. Our own + // callers will guarantee the space is free by giving an aligned value to + // CALLSEQ_START. + } + // Even if we're not expected to free up the space, it's useful to know how + // much is there while considering tail calls (because we can reuse it). + FuncInfo->setBytesInStackArgArea(StackArgSize); + + return Chain; +} + +SDValue +AArch64TargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of the return value to a location. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slots. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze outgoing return values. + CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv)); + + SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + // PCS: "If the type, T, of the result of a function is such that + // void func(T arg) would require that arg be passed as a value in a + // register (or set of registers) according to the rules in 5.4, then the + // result is returned in the same registers as would be used for such an + // argument. + // + // Otherwise, the caller shall reserve a block of memory of sufficient + // size and alignment to hold the result. The address of the memory block + // shall be passed as an additional argument to the function in x8." + // + // This is implemented in two places. The register-return values are dealt + // with here, more complex returns are passed as an sret parameter, which + // means we don't have to worry about it during actual return. + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Only register-returns should be created by PCS"); + + + SDValue Arg = OutVals[i]; + + // There's no convenient note in the ABI about this as there is for normal + // arguments, but it says return values are passed in the same registers as + // an argument would be. I believe that includes the comments about + // unspecified higher bits, putting the burden of widening on the *caller* + // for return values. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: + // Floating-point values should only be extended when they're going into + // memory, which can't happen here so an integer extend is acceptable. + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); + break; + } + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, + &RetOps[0], RetOps.size()); +} + +SDValue +AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet(); + bool IsSibCall = false; + + if (IsTailCall) { + IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); + + // A sibling call is one where we're under the usual C ABI and not planning + // to change that but can still do a tail call: + if (!TailCallOpt && IsTailCall) + IsSibCall = true; + } + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); + + // On AArch64 (and all other architectures I'm aware of) the most this has to + // do is adjust the stack pointer. + unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16); + if (IsSibCall) { + // Since we're not changing the ABI to make this a tail call, the memory + // operands are already available in the caller's incoming argument space. + NumBytes = 0; + } + + // FPDiff is the byte offset of the call's argument area from the callee's. + // Stores to callee stack arguments will be placed in FixedStackSlots offset + // by this amount for a tail call. In a sibling call it must be 0 because the + // caller will deallocate the entire stack and the callee still expects its + // arguments to begin at SP+0. Completely unused for non-tail calls. + int FPDiff = 0; + + if (IsTailCall && !IsSibCall) { + unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + + // FPDiff will be negative if this tail call requires more space than we + // would automatically have in our incoming argument space. Positive if we + // can actually shrink the stack. + FPDiff = NumReusableBytes - NumBytes; + + // The stack pointer must be 16-byte aligned at all times it's used for a + // memory operation, which in practice means at *all* times and in + // particular across call boundaries. Therefore our own arguments started at + // a 16-byte aligned SP and the delta applied for the tail call should + // satisfy the same constraint. + assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); + } + + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, + getPointerTy()); + + SmallVector<SDValue, 8> MemOpChains; + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + SDValue Arg = OutVals[i]; + + // Callee does the actual widening, so all extensions just use an implicit + // definition of the rest of the Loc. Aesthetically, this would be nicer as + // an ANY_EXTEND, but that isn't valid for floating-point types and this + // alternative works on integer types too. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: { + unsigned SrcSize = VA.getValVT().getSizeInBits(); + unsigned SrcSubReg; + + switch (SrcSize) { + case 8: SrcSubReg = AArch64::sub_8; break; + case 16: SrcSubReg = AArch64::sub_16; break; + case 32: SrcSubReg = AArch64::sub_32; break; + case 64: SrcSubReg = AArch64::sub_64; break; + default: llvm_unreachable("Unexpected argument promotion"); + } + + Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, + VA.getLocVT(), + DAG.getUNDEF(VA.getLocVT()), + Arg, + DAG.getTargetConstant(SrcSubReg, MVT::i32)), + 0); + + break; + } + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + // A normal register (sub-) argument. For now we just note it down because + // we want to copy things into registers as late as possible to avoid + // register-pressure (and possibly worse). + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + assert(VA.isMemLoc() && "unexpected argument location"); + + SDValue DstAddr; + MachinePointerInfo DstInfo; + if (IsTailCall) { + uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() : + VA.getLocVT().getSizeInBits(); + OpSize = (OpSize + 7) / 8; + int32_t Offset = VA.getLocMemOffset() + FPDiff; + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + + DstAddr = DAG.getFrameIndex(FI, getPointerTy()); + DstInfo = MachinePointerInfo::getFixedStack(FI); + + // Make sure any stack arguments overlapping with where we're storing are + // loaded before this eventual operation. Otherwise they'll be clobbered. + Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); + } else { + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()); + + DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset()); + } + + if (Flags.isByVal()) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64); + SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode, + Flags.getByValAlign(), + /*isVolatile = */ false, + /*alwaysInline = */ false, + DstInfo, MachinePointerInfo(0)); + MemOpChains.push_back(Cpy); + } else { + // Normal stack argument, put it where it's needed. + SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo, + false, false, 0); + MemOpChains.push_back(Store); + } + } + + // The loads and stores generated above shouldn't clash with each + // other. Combining them with this TokenFactor notes that fact for the rest of + // the backend. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Most of the rest of the instructions need to be glued together; we don't + // want assignments to actual registers used by a call to be rearranged by a + // well-meaning scheduler. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // The linker is responsible for inserting veneers when necessary to put a + // function call destination in range, so we don't need to bother with a + // wrapper here. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + } + + // We don't usually want to end the call-sequence here because we would tidy + // the frame up *after* the call, however in the ABI-changing tail-call case + // we've carefully laid out the parameters so that when sp is reset they'll be + // in the correct location. + if (IsTailCall && !IsSibCall) { + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + } + + // We produce the following DAG scheme for the actual call instruction: + // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag? + // + // Most arguments aren't going to be used and just keep the values live as + // far as LLVM is concerned. It's expected to be selected as simply "bl + // callee" (for a direct, non-tail call). + std::vector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + if (IsTailCall) { + // Each tail call may have to adjust the stack by a different amount, so + // this information must travel along with the operation for eventual + // consumption by emitEpilogue. + Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); + } + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + + // Add a register mask operand representing the call-preserved registers. This + // is used later in codegen to constrain register-allocation. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // If we needed glue, put it in as the last argument. + if (InFlag.getNode()) + Ops.push_back(InFlag); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + if (IsTailCall) { + return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + } + + Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Now we can reclaim the stack, just as well do it before working out where + // our return value is. + if (!IsSibCall) { + uint64_t CalleePopBytes + = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0; + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(CalleePopBytes, true), + InFlag); + InFlag = Chain.getValue(1); + } + + return LowerCallResult(Chain, InFlag, CallConv, + IsVarArg, Ins, dl, DAG, InVals); +} + +SDValue +AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv)); + + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + // Return values that are too big to fit into registers should use an sret + // pointer, so this can be a lot simpler than the main argument code. + assert(VA.isRegLoc() && "Memory locations not expected for call return"); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); + break; + case CCValAssign::ZExt: + case CCValAssign::SExt: + case CCValAssign::AExt: + // Floating-point arguments only get extended/truncated if they're going + // in memory, so using the integer operation is acceptable here. + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + } + + InVals.push_back(Val); + } + + return Chain; +} + +bool +AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + + // For CallingConv::C this function knows whether the ABI needs + // changing. That's not true for other conventions so they will have to opt in + // manually. + if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) + return false; + + const MachineFunction &MF = DAG.getMachineFunction(); + const Function *CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible (see + // X86) but less efficient and uglier in LowerCall. + for (Function::const_arg_iterator i = CallerF->arg_begin(), + e = CallerF->arg_end(); i != e; ++i) + if (i->hasByValAttr()) + return false; + + if (getTargetMachine().Options.GuaranteedTailCallOpt) { + if (IsTailCallConvention(CalleeCC) && CCMatch) + return true; + return false; + } + + // Now we search for cases where we can use a tail call without changing the + // ABI. Sibcall is used in some places (particularly gcc) to refer to this + // concept. + + // I want anyone implementing a new calling convention to think long and hard + // about this assert. + assert((!IsVarArg || CalleeCC == CallingConv::C) + && "Unexpected variadic calling convention"); + + if (IsVarArg && !Outs.empty()) { + // At least two cases here: if caller is fastcc then we can't have any + // memory arguments (we'd be expected to clean up the stack afterwards). If + // caller is C then we could potentially use its argument area. + + // FIXME: for now we take the most conservative of these in both cases: + // disallow all variadic memory operands. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) + if (!ArgLocs[i].isRegLoc()) + return false; + } + + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector<CCValAssign, 16> RVLocs1; + CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), + getTargetMachine(), RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC)); + + SmallVector<CCValAssign, 16> RVLocs2; + CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), + getTargetMachine(), RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC)); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + + // Nothing more to check if the callee is taking no arguments + if (Outs.empty()) + return true; + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + + const AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + + // If the stack arguments for this call would fit into our own save area then + // the call can be made tail. + return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); +} + +bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, + bool TailCallOpt) const { + return CallCC == CallingConv::Fast && TailCallOpt; +} + +bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { + return CallCC == CallingConv::Fast; +} + +SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, + SelectionDAG &DAG, + MachineFrameInfo *MFI, + int ClobberedFI) const { + SmallVector<SDValue, 8> ArgChains; + int64_t FirstByte = MFI->getObjectOffset(ClobberedFI); + int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument corresponding + for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), + UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) + if (FI->getIndex() < 0) { + int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex()); + int64_t InLastByte = InFirstByte; + InLastByte += MFI->getObjectSize(FI->getIndex()) - 1; + + if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || + (FirstByte <= InFirstByte && InFirstByte <= LastByte)) + ArgChains.push_back(SDValue(L, 1)); + } + + // Build a tokenfactor for all the chains. + return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + &ArgChains[0], ArgChains.size()); +} + +static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: return A64CC::EQ; + case ISD::SETGT: return A64CC::GT; + case ISD::SETGE: return A64CC::GE; + case ISD::SETLT: return A64CC::LT; + case ISD::SETLE: return A64CC::LE; + case ISD::SETNE: return A64CC::NE; + case ISD::SETUGT: return A64CC::HI; + case ISD::SETUGE: return A64CC::HS; + case ISD::SETULT: return A64CC::LO; + case ISD::SETULE: return A64CC::LS; + default: llvm_unreachable("Unexpected condition code"); + } +} + +bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const { + // icmp is implemented using adds/subs immediate, which take an unsigned + // 12-bit immediate, optionally shifted left by 12 bits. + + // Symmetric by using adds/subs + if (Val < 0) + Val = -Val; + + return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0; +} + +SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue &A64cc, + SelectionDAG &DAG, DebugLoc &dl) const { + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { + int64_t C = 0; + EVT VT = RHSC->getValueType(0); + bool knownInvalid = false; + + // I'm not convinced the rest of LLVM handles these edge cases properly, but + // we can at least get it right. + if (isSignedIntSetCC(CC)) { + C = RHSC->getSExtValue(); + } else if (RHSC->getZExtValue() > INT64_MAX) { + // A 64-bit constant not representable by a signed 64-bit integer is far + // too big to fit into a SUBS immediate anyway. + knownInvalid = true; + } else { + C = RHSC->getZExtValue(); + } + + if (!knownInvalid && !isLegalICmpImmediate(C)) { + // Constant does not fit, try adjusting it by one? + switch (CC) { + default: break; + case ISD::SETLT: + case ISD::SETGE: + if (isLegalICmpImmediate(C-1)) { + CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; + RHS = DAG.getConstant(C-1, VT); + } + break; + case ISD::SETULT: + case ISD::SETUGE: + if (isLegalICmpImmediate(C-1)) { + CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; + RHS = DAG.getConstant(C-1, VT); + } + break; + case ISD::SETLE: + case ISD::SETGT: + if (isLegalICmpImmediate(C+1)) { + CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; + RHS = DAG.getConstant(C+1, VT); + } + break; + case ISD::SETULE: + case ISD::SETUGT: + if (isLegalICmpImmediate(C+1)) { + CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + RHS = DAG.getConstant(C+1, VT); + } + break; + } + } + } + + A64CC::CondCodes CondCode = IntCCToA64CC(CC); + A64cc = DAG.getConstant(CondCode, MVT::i32); + return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); +} + +static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, + A64CC::CondCodes &Alternative) { + A64CC::CondCodes CondCode = A64CC::Invalid; + Alternative = A64CC::Invalid; + + switch (CC) { + default: llvm_unreachable("Unknown FP condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: CondCode = A64CC::EQ; break; + case ISD::SETGT: + case ISD::SETOGT: CondCode = A64CC::GT; break; + case ISD::SETGE: + case ISD::SETOGE: CondCode = A64CC::GE; break; + case ISD::SETOLT: CondCode = A64CC::MI; break; + case ISD::SETOLE: CondCode = A64CC::LS; break; + case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break; + case ISD::SETO: CondCode = A64CC::VC; break; + case ISD::SETUO: CondCode = A64CC::VS; break; + case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break; + case ISD::SETUGT: CondCode = A64CC::HI; break; + case ISD::SETUGE: CondCode = A64CC::PL; break; + case ISD::SETLT: + case ISD::SETULT: CondCode = A64CC::LT; break; + case ISD::SETLE: + case ISD::SETULE: CondCode = A64CC::LE; break; + case ISD::SETNE: + case ISD::SETUNE: CondCode = A64CC::NE; break; + } + return CondCode; +} + +SDValue +AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT PtrVT = getPointerTy(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + + assert(getTargetMachine().getCodeModel() == CodeModel::Small + && "Only small code model supported at the moment"); + + // The most efficient code is PC-relative anyway for the small memory model, + // so we don't need to worry about relocation model. + return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_LO12), + DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); +} + + +// (BRCOND chain, val, dest) +SDValue +AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); + SDValue TheBit = Op.getOperand(1); + SDValue DestBB = Op.getOperand(2); + + // AArch64 BooleanContents is the default UndefinedBooleanContent, which means + // that as the consumer we are responsible for ignoring rubbish in higher + // bits. + TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, + DAG.getConstant(1, MVT::i32)); + + SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, + DAG.getConstant(0, TheBit.getValueType()), + DAG.getCondCode(ISD::SETNE)); + + return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain, + A64CMP, DAG.getConstant(A64CC::NE, MVT::i32), + DestBB); +} + +// (BR_CC chain, condcode, lhs, rhs, dest) +SDValue +AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue DestBB = Op.getOperand(4); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to runtime calls by a routine which sets + // LHS, RHS and CC appropriately for the rest of this function to continue. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + Chain, CmpOp, A64cc, DestBB); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + Chain, SetCC, A64cc, DestBB); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + A64BR_CC, SetCC, A64cc, DestBB); + + } + + return A64BR_CC; +} + +SDValue +AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const { + ArgListTy Args; + ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op.getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy()); + + Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); + + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain); + if (isTailCall) + InChain = TCChain; + + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, false, false, false, false, + 0, getLibcallCallingConv(Call), isTailCall, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, Op->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); + + if (!CallInfo.second.getNode()) + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + + return CallInfo.first; +} + +SDValue +AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } + + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, Op.getDebugLoc()); +} + +SDValue +AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + +SDValue +AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } + + RTLIB::Libcall LC; + if (IsSigned) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + +SDValue +AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) const { + // TableGen doesn't have easy access to the CodeModel or RelocationModel, so + // we make that distinction here. + + // We support the small memory model for now. + assert(getTargetMachine().getCodeModel() == CodeModel::Small); + + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *GV = GN->getGlobal(); + unsigned Alignment = GV->getAlignment(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) { + // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate + // to zero when they remain undefined. In PIC mode the GOT can take care of + // this, but in absolute mode we use a constant pool load. + SDValue PoolAddr; + PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetConstantPool(GV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetConstantPool(GV, PtrVT, 0, 0, + AArch64II::MO_LO12), + DAG.getConstant(8, MVT::i32)); + SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, + MachinePointerInfo::getConstantPool(), + /*isVolatile=*/ false, + /*isNonTemporal=*/ true, + /*isInvariant=*/ true, 8); + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalAddr; + } + + if (Alignment == 0) { + const PointerType *GVPtrTy = cast<PointerType>(GV->getType()); + if (GVPtrTy->getElementType()->isSized()) { + Alignment + = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); + } else { + // Be conservative if we can't guess, not that it really matters: + // functions and labels aren't valid for loads, and the methods used to + // actually calculate an address work with any alignment. + Alignment = 1; + } + } + + unsigned char HiFixup, LoFixup; + bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM); + + if (UseGOT) { + HiFixup = AArch64II::MO_GOT; + LoFixup = AArch64II::MO_GOT_LO12; + Alignment = 8; + } else { + HiFixup = AArch64II::MO_NO_FLAG; + LoFixup = AArch64II::MO_LO12; + } + + // AArch64's small model demands the following sequence: + // ADRP x0, somewhere + // ADD x0, x0, #:lo12:somewhere ; (or LDR directly). + SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + HiFixup), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + LoFixup), + DAG.getConstant(Alignment, MVT::i32)); + + if (UseGOT) { + GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(), + GlobalRef); + } + + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalRef; +} + +SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, + SDValue DescAddr, + DebugLoc DL, + SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(); + + // The function we need to call is simply the first entry in the GOT for this + // descriptor, load it in preparation. + SDValue Func, Chain; + Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), + DescAddr); + + // The function takes only one argument: the address of the descriptor itself + // in X0. + SDValue Glue; + Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); + Glue = Chain.getValue(1); + + // Finally, there's a special calling-convention which means that the lookup + // must preserve all registers (except X0, obviously). + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const AArch64RegisterInfo *A64RI + = static_cast<const AArch64RegisterInfo *>(TRI); + const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask(); + + // We're now ready to populate the argument list, as with a normal call: + std::vector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Func); + Ops.push_back(SymAddr); + Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); + Ops.push_back(DAG.getRegisterMask(Mask)); + Ops.push_back(Glue); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0], + Ops.size()); + Glue = Chain.getValue(1); + + // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it + // back to the generic handling code. + return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); +} + +SDValue +AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetELF() && + "TLS not implemented for non-ELF targets"); + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); + + TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + + SDValue TPOff; + EVT PtrVT = getPointerTy(); + DebugLoc DL = Op.getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + + SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); + + if (Model == TLSModel::InitialExec) { + TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_GOTTPREL), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_GOTTPREL_LO12), + DAG.getConstant(8, MVT::i32)); + TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), + TPOff); + } else if (Model == TLSModel::LocalExec) { + SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_TPREL_G1); + SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_TPREL_G0_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, + TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + } else if (Model == TLSModel::GeneralDynamic) { + // Accesses used in this sequence go via the TLS descriptor which lives in + // the GOT. Prepare an address we can use to handle this. + SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_TLSDESC); + SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_TLSDESC_LO12); + SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + HiDesc, LoDesc, + DAG.getConstant(8, MVT::i32)); + SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0); + + TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + } else if (Model == TLSModel::LocalDynamic) { + // Local-dynamic accesses proceed in two phases. A general-dynamic TLS + // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate + // the beginning of the module's TLS region, followed by a DTPREL offset + // calculation. + + // These accesses will need deduplicating if there's more than one. + AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction() + .getInfo<AArch64MachineFunctionInfo>(); + MFI->incNumLocalDynamicTLSAccesses(); + + + // Get the location of _TLS_MODULE_BASE_: + SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLSDESC); + SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLSDESC_LO12); + SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + HiDesc, LoDesc, + DAG.getConstant(8, MVT::i32)); + SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT); + + ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + + // Get the variable's offset from _TLS_MODULE_BASE_ + SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_DTPREL_G1); + SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_DTPREL_G0_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, + TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + } else + llvm_unreachable("Unsupported TLS access model"); + + + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); +} + +SDValue +AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { + if (Op.getValueType() != MVT::f128) { + // Legal for everything except f128. + return Op; + } + + RTLIB::Libcall LC; + if (IsSigned) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + + +SDValue +AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + DebugLoc dl = JT->getDebugLoc(); + + // When compiling PIC, jump tables get put in the code section so a static + // relocation-style is acceptable for both cases. + return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(), + DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()), + DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), + AArch64II::MO_LO12), + DAG.getConstant(1, MVT::i32)); +} + +// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) +SDValue +AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue IfTrue = Op.getOperand(2); + SDValue IfFalse = Op.getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to libcalls, but slot in nicely here + // afterwards. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + CmpOp, IfTrue, IfFalse, A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, + Op.getValueType(), + SetCC, IfTrue, IfFalse, A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SetCC, IfTrue, A64SELECT_CC, A64cc); + + } + + return A64SELECT_CC; +} + +// (SELECT testbit, iftrue, iffalse) +SDValue +AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue TheBit = Op.getOperand(0); + SDValue IfTrue = Op.getOperand(1); + SDValue IfFalse = Op.getOperand(2); + + // AArch64 BooleanContents is the default UndefinedBooleanContent, which means + // that as the consumer we are responsible for ignoring rubbish in higher + // bits. + TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, + DAG.getConstant(1, MVT::i32)); + SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, + DAG.getConstant(0, TheBit.getValueType()), + DAG.getCondCode(ISD::SETNE)); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + A64CMP, IfTrue, IfFalse, + DAG.getConstant(A64CC::NE, MVT::i32)); +} + +// (SETCC lhs, rhs, condcode) +SDValue +AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + EVT VT = Op.getValueType(); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS + // for the rest of the function (some i32 or i64 values). + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, use it. + if (RHS.getNode() == 0) { + assert(LHS.getValueType() == Op.getValueType() && + "Unexpected setcc expansion!"); + return LHS; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, + CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT), + A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, + CmpOp, DAG.getConstant(1, VT), + DAG.getConstant(0, VT), A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, + DAG.getConstant(1, VT), A64SELECT_CC, A64cc); + } + + return A64SELECT_CC; +} + +SDValue +AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { + const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + + // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes + // rather than just 8. + return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(), + Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(32, MVT::i32), 8, false, false, + MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); +} + +SDValue +AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + // The layout of the va_list struct is specified in the AArch64 Procedure Call + // Standard, section B.3. + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Chain = Op.getOperand(0); + SDValue VAList = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + SmallVector<SDValue, 4> MemOps; + + // void *__stack at offset 0 + SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(), + getPointerTy()); + MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, + MachinePointerInfo(SV), false, false, 0)); + + // void *__gr_top at offset 8 + int GPRSize = FuncInfo->getVariadicGPRSize(); + if (GPRSize > 0) { + SDValue GRTop, GRTopAddr; + + GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(8, getPointerTy())); + + GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy()); + GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, + DAG.getConstant(GPRSize, getPointerTy())); + + MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, + MachinePointerInfo(SV, 8), + false, false, 0)); + } + + // void *__vr_top at offset 16 + int FPRSize = FuncInfo->getVariadicFPRSize(); + if (FPRSize > 0) { + SDValue VRTop, VRTopAddr; + VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(16, getPointerTy())); + + VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy()); + VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, + DAG.getConstant(FPRSize, getPointerTy())); + + MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, + MachinePointerInfo(SV, 16), + false, false, 0)); + } + + // int __gr_offs at offset 24 + SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(24, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), + GROffsAddr, MachinePointerInfo(SV, 24), + false, false, 0)); + + // int __vr_offs at offset 28 + SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(28, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), + VROffsAddr, MachinePointerInfo(SV, 28), + false, false, 0)); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], + MemOps.size()); +} + +SDValue +AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: llvm_unreachable("Don't know how to custom lower this!"); + case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128); + case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128); + case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128); + case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128); + case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true); + case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false); + case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true); + case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + } + + return SDValue(); +} + +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // We're looking for an SRA/SHL pair which form an SBFX. + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + if (!isa<ConstantSDNode>(N->getOperand(1))) + return SDValue(); + + uint64_t TruncMask = N->getConstantOperandVal(1); + if (!isMask_64(TruncMask)) + return SDValue(); + + uint64_t Width = CountPopulation_64(TruncMask); + SDValue Shift = N->getOperand(0); + + if (Shift.getOpcode() != ISD::SRL) + return SDValue(); + + if (!isa<ConstantSDNode>(Shift->getOperand(1))) + return SDValue(); + uint64_t LSB = Shift->getConstantOperandVal(1); + + if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) + return SDValue(); + + return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0), + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(LSB + Width - 1, MVT::i64)); +} + +static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, + TargetLowering::DAGCombinerInfo &DCI) { + // An atomic operation followed by an acquiring atomic fence can be reduced to + // an acquiring load. The atomic operation provides a convenient pointer to + // load from. If the original operation was a load anyway we can actually + // combine the two operations into an acquiring load. + SelectionDAG &DAG = DCI.DAG; + SDValue AtomicOp = FenceNode->getOperand(0); + AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp); + + // A fence on its own can't be optimised + if (!AtomicNode) + return SDValue(); + + AtomicOrdering FenceOrder + = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1)); + SynchronizationScope FenceScope + = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2)); + + if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope()) + return SDValue(); + + // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so + // the chain we use should be its input, otherwise we'll put our store after + // it so we use its output chain. + SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ? + AtomicNode->getChain() : AtomicOp; + + // We have an acquire fence with a handy atomic operation nearby, we can + // convert the fence into a load-acquire, discarding the result. + DebugLoc DL = FenceNode->getDebugLoc(); + SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(), + AtomicNode->getValueType(0), + Chain, // Chain + AtomicOp.getOperand(1), // Pointer + AtomicNode->getMemOperand(), Acquire, + FenceScope); + + if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD) + DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode()); + + return Op.getValue(1); +} + +static SDValue PerformATOMIC_STORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // A releasing atomic fence followed by an atomic store can be combined into a + // single store operation. + SelectionDAG &DAG = DCI.DAG; + AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N); + SDValue FenceOp = AtomicNode->getOperand(0); + + if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE) + return SDValue(); + + AtomicOrdering FenceOrder + = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1)); + SynchronizationScope FenceScope + = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2)); + + if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope()) + return SDValue(); + + DebugLoc DL = AtomicNode->getDebugLoc(); + return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(), + FenceOp.getOperand(0), // Chain + AtomicNode->getOperand(1), // Pointer + AtomicNode->getOperand(2), // Value + AtomicNode->getMemOperand(), Release, + FenceScope); +} + +/// For a true bitfield insert, the bits getting into that contiguous mask +/// should come from the low part of an existing value: they must be formed from +/// a compatible SHL operation (unless they're already low). This function +/// checks that condition and returns the least-significant bit that's +/// intended. If the operation not a field preparation, -1 is returned. +static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT, + SDValue &MaskedVal, uint64_t Mask) { + if (!isShiftedMask_64(Mask)) + return -1; + + // Now we need to alter MaskedVal so that it is an appropriate input for a BFI + // instruction. BFI will do a left-shift by LSB before applying the mask we've + // spotted, so in general we should pre-emptively "undo" that by making sure + // the incoming bits have had a right-shift applied to them. + // + // This right shift, however, will combine with existing left/right shifts. In + // the simplest case of a completely straight bitfield operation, it will be + // expected to completely cancel out with an existing SHL. More complicated + // cases (e.g. bitfield to bitfield copy) may still need a real shift before + // the BFI. + + uint64_t LSB = CountTrailingZeros_64(Mask); + int64_t ShiftRightRequired = LSB; + if (MaskedVal.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(MaskedVal.getOperand(1))) { + ShiftRightRequired -= MaskedVal.getConstantOperandVal(1); + MaskedVal = MaskedVal.getOperand(0); + } else if (MaskedVal.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(MaskedVal.getOperand(1))) { + ShiftRightRequired += MaskedVal.getConstantOperandVal(1); + MaskedVal = MaskedVal.getOperand(0); + } + + if (ShiftRightRequired > 0) + MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal, + DAG.getConstant(ShiftRightRequired, MVT::i64)); + else if (ShiftRightRequired < 0) { + // We could actually end up with a residual left shift, for example with + // "struc.bitfield = val << 1". + MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal, + DAG.getConstant(-ShiftRightRequired, MVT::i64)); + } + + return LSB; +} + +/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by +/// a mask and an extension. Returns true if a BFI was found and provides +/// information on its surroundings. +static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, + bool &Extended) { + Extended = false; + if (N.getOpcode() == ISD::ZERO_EXTEND) { + Extended = true; + N = N.getOperand(0); + } + + if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { + Mask = N->getConstantOperandVal(1); + N = N.getOperand(0); + } else { + // Mask is the whole width. + Mask = -1ULL >> (64 - N.getValueType().getSizeInBits()); + } + + if (N.getOpcode() == AArch64ISD::BFI) { + BFI = N; + return true; + } + + return false; +} + +/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which +/// is roughly equivalent to (and (BFI ...), mask). This form is used because it +/// can often be further combined with a larger mask. Ultimately, we want mask +/// to be 2^32-1 or 2^64-1 so the AND can be skipped. +static SDValue tryCombineToBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or + // abandon the effort. + SDValue LHS = N->getOperand(0); + if (LHS.getOpcode() != ISD::AND) + return SDValue(); + + uint64_t LHSMask; + if (isa<ConstantSDNode>(LHS.getOperand(1))) + LHSMask = LHS->getConstantOperandVal(1); + else + return SDValue(); + + // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask + // is or abandon the effort. + SDValue RHS = N->getOperand(1); + if (RHS.getOpcode() != ISD::AND) + return SDValue(); + + uint64_t RHSMask; + if (isa<ConstantSDNode>(RHS.getOperand(1))) + RHSMask = RHS->getConstantOperandVal(1); + else + return SDValue(); + + // Can't do anything if the masks are incompatible. + if (LHSMask & RHSMask) + return SDValue(); + + // Now we need one of the masks to be a contiguous field. Without loss of + // generality that should be the RHS one. + SDValue Bitfield = LHS.getOperand(0); + if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) { + // We know that LHS is a candidate new value, and RHS isn't already a better + // one. + std::swap(LHS, RHS); + std::swap(LHSMask, RHSMask); + } + + // We've done our best to put the right operands in the right places, all we + // can do now is check whether a BFI exists. + Bitfield = RHS.getOperand(0); + int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask); + if (LSB == -1) + return SDValue(); + + uint32_t Width = CountPopulation_64(RHSMask); + assert(Width && "Expected non-zero bitfield width"); + + SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, + LHS.getOperand(0), Bitfield, + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(Width, MVT::i64)); + + // Mask is trivial + if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits()))) + return BFI; + + return DAG.getNode(ISD::AND, DL, VT, BFI, + DAG.getConstant(LHSMask | RHSMask, VT)); +} + +/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its +/// original input. This is surprisingly common because SROA splits things up +/// into i8 chunks, so the originally detected MaskedBFI may actually only act +/// on the low (say) byte of a word. This is then orred into the rest of the +/// word afterwards. +/// +/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)). +/// +/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the +/// MaskedBFI. We can also deal with a certain amount of extend/truncate being +/// involved. +static SDValue tryCombineToLargerBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // First job is to hunt for a MaskedBFI on either the left or right. Swap + // operands if it's actually on the right. + SDValue BFI; + SDValue PossExtraMask; + uint64_t ExistingMask = 0; + bool Extended = false; + if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended)) + PossExtraMask = N->getOperand(1); + else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended)) + PossExtraMask = N->getOperand(0); + else + return SDValue(); + + // We can only combine a BFI with another compatible mask. + if (PossExtraMask.getOpcode() != ISD::AND || + !isa<ConstantSDNode>(PossExtraMask.getOperand(1))) + return SDValue(); + + uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1); + + // Masks must be compatible. + if (ExtraMask & ExistingMask) + return SDValue(); + + SDValue OldBFIVal = BFI.getOperand(0); + SDValue NewBFIVal = BFI.getOperand(1); + if (Extended) { + // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be + // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments + // need to be made compatible. + assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32 + && "Invalid types for BFI"); + OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal); + NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal); + } + + // We need the MaskedBFI to be combined with a mask of the *same* value. + if (PossExtraMask.getOperand(0) != OldBFIVal) + return SDValue(); + + BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, + OldBFIVal, NewBFIVal, + BFI.getOperand(2), BFI.getOperand(3)); + + // If the masking is trivial, we don't need to create it. + if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits()))) + return BFI; + + return DAG.getNode(ISD::AND, DL, VT, BFI, + DAG.getConstant(ExtraMask | ExistingMask, VT)); +} + +/// An EXTR instruction is made up of two shifts, ORed together. This helper +/// searches for and classifies those shifts. +static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, + bool &FromHi) { + if (N.getOpcode() == ISD::SHL) + FromHi = false; + else if (N.getOpcode() == ISD::SRL) + FromHi = true; + else + return false; + + if (!isa<ConstantSDNode>(N.getOperand(1))) + return false; + + ShiftAmount = N->getConstantOperandVal(1); + Src = N->getOperand(0); + return true; +} + +/// EXTR instruction extracts a contiguous chunk of bits from two existing +/// registers viewed as a high/low pair. This function looks for the pattern: +/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an +/// EXTR. Can't quite be done in TableGen because the two immediates aren't +/// independent. +static SDValue tryCombineToEXTR(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDValue LHS; + uint32_t ShiftLHS = 0; + bool LHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) + return SDValue(); + + SDValue RHS; + uint32_t ShiftRHS = 0; + bool RHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) + return SDValue(); + + // If they're both trying to come from the high part of the register, they're + // not really an EXTR. + if (LHSFromHi == RHSFromHi) + return SDValue(); + + if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) + return SDValue(); + + if (LHSFromHi) { + std::swap(LHS, RHS); + std::swap(ShiftLHS, ShiftRHS); + } + + return DAG.getNode(AArch64ISD::EXTR, DL, VT, + LHS, RHS, + DAG.getConstant(ShiftRHS, MVT::i64)); +} + +/// Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + // Attempt to recognise bitfield-insert operations. + SDValue Res = tryCombineToBFI(N, DCI, Subtarget); + if (Res.getNode()) + return Res; + + // Attempt to combine an existing MaskedBFI operation into one with a larger + // mask. + Res = tryCombineToLargerBFI(N, DCI, Subtarget); + if (Res.getNode()) + return Res; + + Res = tryCombineToEXTR(N, DCI); + if (Res.getNode()) + return Res; + + return SDValue(); +} + +/// Target-specific dag combine xforms for ISD::SRA +static SDValue PerformSRACombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // We're looking for an SRA/SHL pair which form an SBFX. + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + if (!isa<ConstantSDNode>(N->getOperand(1))) + return SDValue(); + + uint64_t ExtraSignBits = N->getConstantOperandVal(1); + SDValue Shift = N->getOperand(0); + + if (Shift.getOpcode() != ISD::SHL) + return SDValue(); + + if (!isa<ConstantSDNode>(Shift->getOperand(1))) + return SDValue(); + + uint64_t BitsOnLeft = Shift->getConstantOperandVal(1); + uint64_t Width = VT.getSizeInBits() - ExtraSignBits; + uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft; + + if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) + return SDValue(); + + return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0), + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(LSB + Width - 1, MVT::i64)); +} + + +SDValue +AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + default: break; + case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI); + case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI); + case ISD::OR: return PerformORCombine(N, DCI, Subtarget); + case ISD::SRA: return PerformSRACombine(N, DCI); + } + return SDValue(); +} + +AArch64TargetLowering::ConstraintType +AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'w': // An FP/SIMD vector register + return C_RegisterClass; + case 'I': // Constant that can be used with an ADD instruction + case 'J': // Constant that can be used with a SUB instruction + case 'K': // Constant that can be used with a 32-bit logical instruction + case 'L': // Constant that can be used with a 64-bit logical instruction + case 'M': // Constant that can be used as a 32-bit MOV immediate + case 'N': // Constant that can be used as a 64-bit MOV immediate + case 'Y': // Floating point constant zero + case 'Z': // Integer constant zero + return C_Other; + case 'Q': // A memory reference with base register and no offset + return C_Memory; + case 'S': // A symbolic address + return C_Other; + } + } + + // FIXME: Ump, Utf, Usa, Ush + // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, + // whatever they may be + // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be + // Usa: An absolute symbolic address + // Ush: The high part (bits 32:12) of a pc-relative symbolic address + assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa" + && Constraint != "Ush" && "Unimplemented constraints"); + + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const { + + llvm_unreachable("Constraint weight unimplemented"); +} + +void +AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + // Only length 1 constraints are C_Other. + if (Constraint.size() != 1) return; + + // Only C_Other constraints get lowered like this. That means constants for us + // so return early if there's no hope the constraint can be lowered. + + switch(Constraint[0]) { + default: break; + case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'Z': { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + if (!C) + return; + + uint64_t CVal = C->getZExtValue(); + uint32_t Bits; + + switch (Constraint[0]) { + default: + // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J' + // is a peculiarly useless SUB constraint. + llvm_unreachable("Unimplemented C_Other constraint"); + case 'I': + if (CVal <= 0xfff) + break; + return; + case 'K': + if (A64Imms::isLogicalImm(32, CVal, Bits)) + break; + return; + case 'L': + if (A64Imms::isLogicalImm(64, CVal, Bits)) + break; + return; + case 'Z': + if (CVal == 0) + break; + return; + } + + Result = DAG.getTargetConstant(CVal, Op.getValueType()); + break; + } + case 'S': { + // An absolute symbolic address or label reference. + if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { + Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), + GA->getValueType(0)); + } else if (const BlockAddressSDNode *BA + = dyn_cast<BlockAddressSDNode>(Op)) { + Result = DAG.getTargetBlockAddress(BA->getBlockAddress(), + BA->getValueType(0)); + } else if (const ExternalSymbolSDNode *ES + = dyn_cast<ExternalSymbolSDNode>(Op)) { + Result = DAG.getTargetExternalSymbol(ES->getSymbol(), + ES->getValueType(0)); + } else + return; + break; + } + case 'Y': + if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) { + if (CFP->isExactlyValue(0.0)) { + Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0)); + break; + } + } + return; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + // It's an unknown constraint for us. Let generic code have a go. + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +std::pair<unsigned, const TargetRegisterClass*> +AArch64TargetLowering::getRegForInlineAsmConstraint( + const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &AArch64::GPR32RegClass); + else if (VT == MVT::i64) + return std::make_pair(0U, &AArch64::GPR64RegClass); + break; + case 'w': + if (VT == MVT::f16) + return std::make_pair(0U, &AArch64::FPR16RegClass); + else if (VT == MVT::f32) + return std::make_pair(0U, &AArch64::FPR32RegClass); + else if (VT == MVT::f64) + return std::make_pair(0U, &AArch64::FPR64RegClass); + else if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &AArch64::VPR64RegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &AArch64::FPR128RegClass); + else if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &AArch64::VPR128RegClass); + break; + } + } + + // Use the default implementation in TargetLowering to convert the register + // constraint into a member of a register class. + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h new file mode 100644 index 0000000..4960d28 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -0,0 +1,247 @@ +//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that AArch64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H +#define LLVM_TARGET_AARCH64_ISELLOWERING_H + +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + + +namespace llvm { +namespace AArch64ISD { + enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // This is a conditional branch which also notes the flag needed + // (eq/sgt/...). A64 puts this information on the branches rather than + // compares as LLVM does. + BR_CC, + + // A node to be selected to an actual call operation: either BL or BLR in + // the absence of tail calls. + Call, + + // Indicates a floating-point immediate which fits into the format required + // by the FMOV instructions. First (and only) operand is the 8-bit encoded + // value of that immediate. + FPMOV, + + // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS + // and an LSB. + EXTR, + + // Wraps a load from the GOT, which should always be performed with a 64-bit + // load instruction. This prevents the DAG combiner folding a truncate to + // form a smaller memory access. + GOTLoad, + + // Performs a bitfield insert. Arguments are: the value being inserted into; + // the value being inserted; least significant bit changed; width of the + // field. + BFI, + + // Simply a convenient node inserted during ISelLowering to represent + // procedure return. Will almost certainly be selected to "RET". + Ret, + + /// Extracts a field of contiguous bits from the source and sign extends + /// them into a single register. Arguments are: source; immr; imms. Note + /// these are pre-encoded since DAG matching can't cope with combining LSB + /// and Width into these values itself. + SBFX, + + /// This is an A64-ification of the standard LLVM SELECT_CC operation. The + /// main difference is that it only has the values and an A64 condition, + /// which will be produced by a setcc instruction. + SELECT_CC, + + /// This serves most of the functions of the LLVM SETCC instruction, for two + /// purposes. First, it prevents optimisations from fiddling with the + /// compare after we've moved the CondCode information onto the SELECT_CC or + /// BR_CC instructions. Second, it gives a legal instruction for the actual + /// comparison. + /// + /// It keeps a record of the condition flags asked for because certain + /// instructions are only valid for a subset of condition codes. + SETCC, + + // Designates a node which is a tail call: both a call and a return + // instruction as far as selction is concerned. It should be selected to an + // unconditional branch. Has the usual plethora of call operands, but: 1st + // is callee, 2nd is stack adjustment required immediately before branch. + TC_RETURN, + + // Designates a call used to support the TLS descriptor ABI. The call itself + // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall + // var") must be attached somehow during code generation. It takes two + // operands: the callee and the symbol to be relocated against. + TLSDESCCALL, + + // Leaf node which will be lowered to an appropriate MRS to obtain the + // thread pointer: TPIDR_EL0. + THREAD_POINTER, + + /// Extracts a field of contiguous bits from the source and zero extends + /// them into a single register. Arguments are: source; immr; imms. Note + /// these are pre-encoded since DAG matching can't cope with combining LSB + /// and Width into these values itself. + UBFX, + + // Wraps an address which the ISelLowering phase has decided should be + // created using the small absolute memory model: i.e. adrp/add or + // adrp/mem-op. This exists to prevent bare TargetAddresses which may never + // get selected. + WrapperSmall + }; +} + + +class AArch64Subtarget; +class AArch64TargetMachine; + +class AArch64TargetLowering : public TargetLowering { +public: + explicit AArch64TargetLowering(AArch64TargetMachine &TM); + + const char *getTargetNodeName(unsigned Opcode) const; + + CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const; + + SDValue LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc DL, SDValue &Chain) const; + + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + + /// Finds the incoming stack arguments which overlap the given fixed stack + /// object and incorporates their load into the current chain. This prevents + /// an upcoming store from clobbering the stack argument before it's used. + SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, + MachineFrameInfo *MFI, int ClobberedFI) const; + + EVT getSetCCResultType(EVT VT) const; + + bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; + + bool IsTailCallConvention(CallingConv::ID CallCC) const; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + bool isLegalICmpImmediate(int64_t Val) const; + SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + MachineBasicBlock * + emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Size, unsigned Opcode) const; + + MachineBasicBlock * + emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, unsigned CmpOp, + A64CC::CondCodes Cond) const; + MachineBasicBlock * + emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size) const; + + MachineBasicBlock * + EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const; + + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, + SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + + ConstraintType getConstraintType(const std::string &Constraint) const; + + ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; +private: + const AArch64Subtarget *Subtarget; + const TargetRegisterInfo *RegInfo; + const InstrItineraryData *Itins; +}; +} // namespace llvm + +#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td new file mode 100644 index 0000000..cb93471 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -0,0 +1,961 @@ +//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file describes AArch64 instruction formats, down to the level of the +// instruction's overall class. +// ===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// A64 Instruction Format Definitions. +//===----------------------------------------------------------------------===// + +// A64 is currently the only instruction set supported by the AArch64 +// architecture. +class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : Instruction { + // All A64 instructions are 32-bit. This field will be filled in + // gradually going down the hierarchy. + field bits<32> Inst; + + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; + + // LLVM-level model of the AArch64/A64 distinction. + let Namespace = "AArch64"; + let DecoderNamespace = "A64"; + let Size = 4; + + // Set the templated fields + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asmstr; + let Pattern = patterns; + let Itinerary = itin; +} + +class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction { + let Namespace = "AArch64"; + + let OutOperandList = outs; + let InOperandList= ins; + let Pattern = patterns; + let isCodeGenOnly = 1; + let isPseudo = 1; +} + +// Represents a pseudo-instruction that represents a single A64 instruction for +// whatever reason, the eventual result will be a 32-bit real instruction. +class A64PseudoInst<dag outs, dag ins, list<dag> patterns> + : PseudoInst<outs, ins, patterns> { + let Size = 4; +} + +// As above, this will be a single A64 instruction, but we can actually give the +// expansion in TableGen. +class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result> + : A64PseudoInst<outs, ins, patterns>, + PseudoInstExpansion<Result>; + + +// First, some common cross-hierarchy register formats. + +class A64InstRd<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rd; + + let Inst{4-0} = Rd; +} + +class A64InstRt<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rt; + + let Inst{4-0} = Rt; +} + + +class A64InstRdn<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> { + // Inherit rdt + bits<5> Rn; + + let Inst{9-5} = Rn; +} + +class A64InstRtn<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> { + // Inherit rdt + bits<5> Rn; + + let Inst{9-5} = Rn; +} + +// Instructions taking Rt,Rt2,Rn +class A64InstRtt2n<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<5> Rt2; + + let Inst{14-10} = Rt2; +} + +class A64InstRdnm<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<5> Rm; + + let Inst{20-16} = Rm; +} + +//===----------------------------------------------------------------------===// +// +// Actual A64 Instruction Formats +// + +// Format for Add-subtract (extended register) instructions. +class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<3> Imm3; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b01011; + let Inst{23-22} = opt; + let Inst{21} = 0b1; + // Rm inherited in 20-16 + let Inst{15-13} = option; + let Inst{12-10} = Imm3; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Add-subtract (immediate) instructions. +class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<12> Imm12; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b10001; + let Inst{23-22} = shift; + let Inst{21-10} = Imm12; +} + +// Format for Add-subtract (shifted register) instructions. +class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<6> Imm6; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b01011; + let Inst{23-22} = shift; + let Inst{21} = 0b0; + // Rm inherited in 20-16 + let Inst{15-10} = Imm6; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Add-subtract (with carry) instructions. +class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-21} = 0b11010000; + // Rm inherited in 20-16 + let Inst{15-10} = opcode2; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + + +// Format for Bitfield instructions +class A64I_bitfield<bit sf, bits<2> opc, bit n, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<6> ImmR; + bits<6> ImmS; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{22} = n; + let Inst{21-16} = ImmR; + let Inst{15-10} = ImmS; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for compare and branch (immediate) instructions. +class A64I_cmpbr<bit sf, bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> { + bits<19> Label; + + let Inst{31} = sf; + let Inst{30-25} = 0b011010; + let Inst{24} = op; + let Inst{23-5} = Label; + // Inherit Rt in 4-0 +} + +// Format for conditional branch (immediate) instructions. +class A64I_condbr<bit o1, bit o0, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<19> Label; + bits<4> Cond; + + let Inst{31-25} = 0b0101010; + let Inst{24} = o1; + let Inst{23-5} = Label; + let Inst{4} = o0; + let Inst{3-0} = Cond; +} + +// Format for conditional compare (immediate) instructions. +class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rn; + bits<5> UImm5; + bits<4> NZCVImm; + bits<4> Cond; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010010; + let Inst{20-16} = UImm5; + let Inst{15-12} = Cond; + let Inst{11} = 0b1; + let Inst{10} = o2; + let Inst{9-5} = Rn; + let Inst{4} = o3; + let Inst{3-0} = NZCVImm; +} + +// Format for conditional compare (register) instructions. +class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rn; + bits<5> Rm; + bits<4> NZCVImm; + bits<4> Cond; + + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010010; + let Inst{20-16} = Rm; + let Inst{15-12} = Cond; + let Inst{11} = 0b0; + let Inst{10} = o2; + let Inst{9-5} = Rn; + let Inst{4} = o3; + let Inst{3-0} = NZCVImm; +} + +// Format for conditional select instructions. +class A64I_condsel<bit sf, bit op, bit s, bits<2> op2, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<4> Cond; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010100; + // Inherit Rm in 20-16 + let Inst{15-12} = Cond; + let Inst{11-10} = op2; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for data processing (1 source) instructions +class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode, + string asmstr, dag outs, dag ins, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + let Inst{31} = sf; + let Inst{30} = 0b1; + let Inst{29} = S; + let Inst{28-21} = 0b11010110; + let Inst{20-16} = opcode2; + let Inst{15-10} = opcode; +} + +// Format for data processing (2 source) instructions +class A64I_dp_2src<bit sf, bits<6> opcode, bit S, + string asmstr, dag outs, dag ins, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = S; + let Inst{28-21} = 0b11010110; + let Inst{15-10} = opcode; +} + +// Format for data-processing (3 source) instructions + +class A64I_dp3<bit sf, bits<6> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<5> Ra; + + let Inst{31} = sf; + let Inst{30-29} = opcode{5-4}; + let Inst{28-24} = 0b11011; + let Inst{23-21} = opcode{3-1}; + // Inherits Rm in 20-16 + let Inst{15} = opcode{0}; + let Inst{14-10} = Ra; + // Inherits Rn in 9-5 + // Inherits Rd in 4-0 +} + +// Format for exception generation instructions +class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<16> UImm16; + + let Inst{31-24} = 0b11010100; + let Inst{23-21} = opc; + let Inst{20-5} = UImm16; + let Inst{4-2} = op2; + let Inst{1-0} = ll; +} + +// Format for extract (immediate) instructions +class A64I_extract<bit sf, bits<3> op, bit n, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<6> LSB; + + let Inst{31} = sf; + let Inst{30-29} = op{2-1}; + let Inst{28-23} = 0b100111; + let Inst{22} = n; + let Inst{21} = op{0}; + // Inherits Rm in bits 20-16 + let Inst{15-10} = LSB; + // Inherits Rn in 9-5 + // Inherits Rd in 4-0 +} + +// Format for floating-point compare instructions. +class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-14} = op; + let Inst{13-10} = 0b1000; + let Inst{9-5} = Rn; + let Inst{4-0} = opcode2; +} + +// Format for floating-point conditional compare instructions. +class A64I_fpccmp<bit m, bit s, bits<2> type, bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<5> Rn; + bits<5> Rm; + bits<4> NZCVImm; + bits<4> Cond; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-12} = Cond; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4} = op; + let Inst{3-0} = NZCVImm; +} + +// Format for floating-point conditional select instructions. +class A64I_fpcondsel<bit m, bit s, bits<2> type, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<4> Cond; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = Cond; + let Inst{11-10} = 0b11; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format for floating-point data-processing (1 source) instructions. +class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-15} = opcode; + let Inst{14-10} = 0b10000; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point data-processing (2 sources) instructions. +class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point data-processing (3 sources) instructions. +class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<5> Ra; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11111; + let Inst{23-22} = type; + let Inst{21} = o1; + // Inherit Rm in 20-16 + let Inst{15} = o0; + let Inst{14-10} = Ra; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point <-> fixed-point conversion instructions. +class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<6> Scale; + + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b0; + let Inst{20-19} = mode; + let Inst{18-16} = opcode; + let Inst{15-10} = Scale; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point <-> integer conversion instructions. +class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format for floating-point immediate instructions. +class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> { + bits<8> Imm8; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-13} = Imm8; + let Inst{12-10} = 0b100; + let Inst{9-5} = imm5; + // Inherit Rd in 4-0 +} + +// Format for load-register (literal) instructions. +class A64I_LDRlit<bits<2> opc, bit v, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> { + bits<19> Imm19; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-5} = Imm19; + // Inherit Rt in 4-0 +} + +// Format for load-store exclusive instructions. +class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + let Inst{31-30} = size; + let Inst{29-24} = 0b001000; + let Inst{23} = o2; + let Inst{22} = L; + let Inst{21} = o1; + let Inst{15} = o0; +} + +class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rt2; + let Inst{14-10} = Rt2; +} + +class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rs; + let Inst{20-16} = Rs; +} + +class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rt2; + let Inst{14-10} = Rt2; +} + +// Format for load-store register (immediate post-indexed) instructions +class A64I_LSpostind<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<9> SImm9; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b01; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store register (immediate pre-indexed) instructions +class A64I_LSpreind<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<9> SImm9; + + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b11; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store register (unprivileged) instructions +class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<9> SImm9; + + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store (unscaled immediate) instructions. +class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<9> SImm9; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b00; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + + +// Format for load-store (unsigned immediate) instructions. +class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<12> UImm12; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b01; + let Inst{23-22} = opc; + let Inst{21-10} = UImm12; +} + +// Format for load-store register (register offset) instructions. +class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<5> Rm; + + // Complex operand selection needed for these instructions, so they + // need an "addr" field for encoding/decoding to be generated. + bits<3> Ext; + // OptionHi = Ext{2-1} + // S = Ext{0} + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-14} = Ext{2-1}; + let Inst{13} = optionlo; + let Inst{12} = Ext{0}; + let Inst{11-10} = 0b10; + // Inherits Rn in 9-5 + // Inherits Rt in 4-0 + + let AddedComplexity = 50; +} + +// Format for Load-store register pair (offset) instructions +class A64I_LSPoffset<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b010; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store register pair (post-indexed) instructions +class A64I_LSPpostind<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b001; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store register pair (pre-indexed) instructions +class A64I_LSPpreind<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b011; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store non-temporal register pair (offset) instructions +class A64I_LSPnontemp<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b000; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Logical (immediate) instructions +class A64I_logicalimm<bit sf, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bit N; + bits<6> ImmR; + bits<6> ImmS; + + // N, ImmR and ImmS have no separate existence in any assembly syntax (or for + // selection), so we'll combine them into a single field here. + bits<13> Imm; + // N = Imm{12}; + // ImmR = Imm{11-6}; + // ImmS = Imm{5-0}; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100100; + let Inst{22} = Imm{12}; + let Inst{21-16} = Imm{11-6}; + let Inst{15-10} = Imm{5-0}; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Logical (shifted register) instructions +class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<6> Imm6; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-24} = 0b01010; + let Inst{23-22} = shift; + let Inst{21} = N; + // Rm inherited + let Inst{15-10} = Imm6; + // Rn inherited + // Rd inherited +} + +// Format for Move wide (immediate) +class A64I_movw<bit sf, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> { + bits<16> UImm16; + bits<2> Shift; // Called "hw" officially + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = Shift; + let Inst{20-5} = UImm16; + // Inherits Rd in 4-0 +} + +// Format for PC-relative addressing instructions, ADR and ADRP. +class A64I_PCADR<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> { + bits<21> Label; + + let Inst{31} = op; + let Inst{30-29} = Label{1-0}; + let Inst{28-24} = 0b10000; + let Inst{23-5} = Label{20-2}; +} + +// Format for system instructions +class A64I_system<bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<2> Op0; + bits<3> Op1; + bits<4> CRn; + bits<4> CRm; + bits<3> Op2; + bits<5> Rt; + + let Inst{31-22} = 0b1101010100; + let Inst{21} = l; + let Inst{20-19} = Op0; + let Inst{18-16} = Op1; + let Inst{15-12} = CRn; + let Inst{11-8} = CRm; + let Inst{7-5} = Op2; + let Inst{4-0} = Rt; + + // These instructions can do horrible things. + let hasSideEffects = 1; +} + +// Format for unconditional branch (immediate) instructions +class A64I_Bimm<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<26> Label; + + let Inst{31} = op; + let Inst{30-26} = 0b00101; + let Inst{25-0} = Label; +} + +// Format for Test & branch (immediate) instructions +class A64I_TBimm<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> { + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<6> Imm; + bits<14> Label; + + let Inst{31} = Imm{5}; + let Inst{30-25} = 0b011011; + let Inst{24} = op; + let Inst{23-19} = Imm{4-0}; + let Inst{18-5} = Label; + // Inherit Rt in 4-0 +} + +// Format for Unconditional branch (register) instructions, including +// RET. Shares no fields with instructions further up the hierarchy +// so top-level. +class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<5> Rn; + + let Inst{31-25} = 0b1101011; + let Inst{24-21} = opc; + let Inst{20-16} = op2; + let Inst{15-10} = op3; + let Inst{9-5} = Rn; + let Inst{4-0} = op4; +} + diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp new file mode 100644 index 0000000..cf3a2c3 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -0,0 +1,822 @@ +//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#include <algorithm> + +#define GET_INSTRINFO_CTOR +#include "AArch64GenInstrInfo.inc" + +using namespace llvm; + +AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) + : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), + RI(*this, STI), Subtarget(STI) {} + +void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0; + unsigned ZeroReg = 0; + if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) { + // E.g. ADD xDst, xsp, #0 (, lsl #0) + BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg) + .addReg(SrcReg) + .addImm(0); + return; + } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { + // E.g. ADD wDST, wsp, #0 (, lsl #0) + BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg) + .addReg(SrcReg) + .addImm(0); + return; + } else if (DestReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + // E.g. MSR NZCV, xDST + BuildMI(MBB, I, DL, get(AArch64::MSRix)) + .addImm(A64SysReg::NZCV) + .addReg(SrcReg); + } else if (SrcReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(DestReg)); + // E.g. MRS xDST, NZCV + BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) + .addImm(A64SysReg::NZCV); + } else if (AArch64::GPR64RegClass.contains(DestReg)) { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + Opc = AArch64::ORRxxx_lsl; + ZeroReg = AArch64::XZR; + } else if (AArch64::GPR32RegClass.contains(DestReg)) { + assert(AArch64::GPR32RegClass.contains(SrcReg)); + Opc = AArch64::ORRwww_lsl; + ZeroReg = AArch64::WZR; + } else if (AArch64::FPR32RegClass.contains(DestReg)) { + assert(AArch64::FPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) + .addReg(SrcReg); + return; + } else if (AArch64::FPR64RegClass.contains(DestReg)) { + assert(AArch64::FPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) + .addReg(SrcReg); + return; + } else if (AArch64::FPR128RegClass.contains(DestReg)) { + assert(AArch64::FPR128RegClass.contains(SrcReg)); + + // FIXME: there's no good way to do this, at least without NEON: + // + There's no single move instruction for q-registers + // + We can't create a spill slot and use normal STR/LDR because stack + // allocation has already happened + // + We can't go via X-registers with FMOV because register allocation has + // already happened. + // This may not be efficient, but at least it works. + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) + .addReg(SrcReg) + .addReg(AArch64::XSP) + .addImm(0x1ff & -16); + + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) + .addReg(AArch64::XSP, RegState::Define) + .addReg(AArch64::XSP) + .addImm(16); + return; + } else { + llvm_unreachable("Unknown register class in copyPhysReg"); + } + + // E.g. ORR xDst, xzr, xSrc, lsl #0 + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(ZeroReg) + .addReg(SrcReg) + .addImm(0); +} + +MachineInstr * +AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0) + .addImm(Offset) + .addMetadata(MDPtr); + return &*MIB; +} + +/// Does the Opcode represent a conditional branch that we can remove and re-add +/// at the end of a basic block? +static bool isCondBranch(unsigned Opc) { + return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx || + Opc == AArch64::CBNZw || Opc == AArch64::CBNZx || + Opc == AArch64::TBZwii || Opc == AArch64::TBZxii || + Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii; +} + +/// Takes apart a given conditional branch MachineInstr (see isCondBranch), +/// setting TBB to the destination basic block and populating the Cond vector +/// with data necessary to recreate the conditional branch at a later +/// date. First element will be the opcode, and subsequent ones define the +/// conditions being branched on in an instruction-specific manner. +static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB, + SmallVectorImpl<MachineOperand> &Cond) { + switch(I->getOpcode()) { + case AArch64::Bcc: + case AArch64::CBZw: + case AArch64::CBZx: + case AArch64::CBNZw: + case AArch64::CBNZx: + // These instructions just have one predicate operand in position 0 (either + // a condition code or a register being compared). + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + TBB = I->getOperand(1).getMBB(); + return; + case AArch64::TBZwii: + case AArch64::TBZxii: + case AArch64::TBNZwii: + case AArch64::TBNZxii: + // These have two predicate operands: a register and a bit position. + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + Cond.push_back(I->getOperand(1)); + TBB = I->getOperand(2).getMBB(); + return; + default: + llvm_unreachable("Unknown conditional branch to classify"); + } +} + + +bool +AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastOpc == AArch64::Bimm) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isCondBranch(LastOpc)) { + classifyCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && LastOpc == AArch64::Bimm) { + while (SecondLastOpc == AArch64::Bimm) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + // Return now the only terminator is an unconditional branch. + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + } + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with a B and a Bcc, handle it. + if (LastOpc == AArch64::Bimm) { + if (SecondLastOpc == AArch64::Bcc) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (isCondBranch(SecondLastOpc)) { + classifyCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +bool AArch64InstrInfo::ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + switch (Cond[0].getImm()) { + case AArch64::Bcc: { + A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm()); + CC = A64InvertCondCode(CC); + Cond[1].setImm(CC); + return false; + } + case AArch64::CBZw: + Cond[0].setImm(AArch64::CBNZw); + return false; + case AArch64::CBZx: + Cond[0].setImm(AArch64::CBNZx); + return false; + case AArch64::CBNZw: + Cond[0].setImm(AArch64::CBZw); + return false; + case AArch64::CBNZx: + Cond[0].setImm(AArch64::CBZx); + return false; + case AArch64::TBZwii: + Cond[0].setImm(AArch64::TBNZwii); + return false; + case AArch64::TBZxii: + Cond[0].setImm(AArch64::TBNZxii); + return false; + case AArch64::TBNZwii: + Cond[0].setImm(AArch64::TBZwii); + return false; + case AArch64::TBNZxii: + Cond[0].setImm(AArch64::TBZxii); + return false; + default: + llvm_unreachable("Unknown branch type"); + } +} + + +unsigned +AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + if (FBB == 0 && Cond.empty()) { + BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB); + return 1; + } else if (FBB == 0) { + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (int i = 1, e = Cond.size(); i != e; ++i) + MIB.addOperand(Cond[i]); + MIB.addMBB(TBB); + return 1; + } + + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (int i = 1, e = Cond.size(); i != e; ++i) + MIB.addOperand(Cond[i]); + MIB.addMBB(TBB); + + BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB); + return 2; +} + +unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } + if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode())) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (!isCondBranch(I->getOpcode())) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +bool +AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AArch64::TLSDESC_BLRx: { + MachineInstr *NewMI = + BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL)) + .addOperand(MI.getOperand(1)); + MI.setDesc(get(AArch64::BLRx)); + + llvm::finalizeBundle(MBB, NewMI, *++MBBI); + return true; + } + default: + return false; + } + + return false; +} + +void +AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FrameIdx); + + MachineMemOperand *MMO + = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, + MFI.getObjectSize(FrameIdx), + Align); + + unsigned StoreOp = 0; + if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { + switch(RC->getSize()) { + case 4: StoreOp = AArch64::LS32_STR; break; + case 8: StoreOp = AArch64::LS64_STR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } else { + assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || + RC->hasType(MVT::f128)) + && "Expected integer or floating type for store"); + switch (RC->getSize()) { + case 4: StoreOp = AArch64::LSFP32_STR; break; + case 8: StoreOp = AArch64::LSFP64_STR; break; + case 16: StoreOp = AArch64::LSFP128_STR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); + NewMI.addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIdx) + .addImm(0) + .addMemOperand(MMO); + +} + +void +AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FrameIdx); + + MachineMemOperand *MMO + = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIdx), + Align); + + unsigned LoadOp = 0; + if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { + switch(RC->getSize()) { + case 4: LoadOp = AArch64::LS32_LDR; break; + case 8: LoadOp = AArch64::LS64_LDR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } else { + assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) + || RC->hasType(MVT::f128)) + && "Expected integer or floating type for store"); + switch (RC->getSize()) { + case 4: LoadOp = AArch64::LSFP32_LDR; break; + case 8: LoadOp = AArch64::LSFP64_LDR; break; + case 16: LoadOp = AArch64::LSFP128_LDR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); + NewMI.addFrameIndex(FrameIdx) + .addImm(0) + .addMemOperand(MMO); +} + +unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const { + unsigned Limit = (1 << 16) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (!I->getOperand(i).isFI()) continue; + + // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff + // is the largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) { + Limit = std::min(Limit, 0xfffu); + break; + } + + int AccessScale, MinOffset, MaxOffset; + getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset); + Limit = std::min(Limit, static_cast<unsigned>(MaxOffset)); + + break; // At most one FI per instruction + } + } + } + + return Limit; +} +void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, + int &AccessScale, int &MinOffset, + int &MaxOffset) const { + switch (MI.getOpcode()) { + default: llvm_unreachable("Unkown load/store kind"); + case TargetOpcode::DBG_VALUE: + AccessScale = 1; + MinOffset = INT_MIN; + MaxOffset = INT_MAX; + return; + case AArch64::LS8_LDR: case AArch64::LS8_STR: + case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: + case AArch64::LDRSBw: + case AArch64::LDRSBx: + AccessScale = 1; + MinOffset = 0; + MaxOffset = 0xfff; + return; + case AArch64::LS16_LDR: case AArch64::LS16_STR: + case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: + case AArch64::LDRSHw: + case AArch64::LDRSHx: + AccessScale = 2; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LS32_LDR: case AArch64::LS32_STR: + case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: + case AArch64::LDRSWx: + case AArch64::LDPSWx: + AccessScale = 4; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LS64_LDR: case AArch64::LS64_STR: + case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: + case AArch64::PRFM: + AccessScale = 8; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: + AccessScale = 16; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: + case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: + AccessScale = 4; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: + case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: + AccessScale = 8; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: + AccessScale = 16; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + } +} + +unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + const MCInstrDesc &MCID = MI.getDesc(); + const MachineBasicBlock &MBB = *MI.getParent(); + const MachineFunction &MF = *MBB.getParent(); + const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); + + if (MCID.getSize()) + return MCID.getSize(); + + if (MI.getOpcode() == AArch64::INLINEASM) + return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); + + if (MI.isLabel()) + return 0; + + switch (MI.getOpcode()) { + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: + return 0; + case AArch64::TLSDESCCALL: + return 0; + default: + llvm_unreachable("Unknown instruction class"); + } +} + +unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += getInstSizeInBytes(*I); + } + return Size; +} + +bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo &TII) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MFI.getObjectOffset(FrameRegIdx); + llvm_unreachable("Unimplemented rewriteFrameIndex"); +} + +void llvm::emitRegUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, + int64_t NumBytes, MachineInstr::MIFlag MIFlags) { + if (NumBytes == 0 && DstReg == SrcReg) + return; + else if (abs64(NumBytes) & ~0xffffff) { + // Generically, we have to materialize the offset into a temporary register + // and subtract it. There are a couple of ways this could be done, for now + // we'll use a movz/movk or movn/movk sequence. + uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes)); + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) + .addImm(0xffff & Bits).addImm(0) + .setMIFlags(MIFlags); + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(1) + .setMIFlags(MIFlags); + } + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(2) + .setMIFlags(MIFlags); + } + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(3) + .setMIFlags(MIFlags); + } + + // ADD DST, SRC, xTMP (, lsl #0) + unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; + BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addReg(ScratchReg, RegState::Kill) + .addImm(0) + .setMIFlag(MIFlags); + return; + } + + // Now we know that the adjustment can be done in at most two add/sub + // (immediate) instructions, which is always more efficient than a + // literal-pool load, or even a hypothetical movz/movk/add sequence + + // Decide whether we're doing addition or subtraction + unsigned LowOp, HighOp; + if (NumBytes >= 0) { + LowOp = AArch64::ADDxxi_lsl0_s; + HighOp = AArch64::ADDxxi_lsl12_s; + } else { + LowOp = AArch64::SUBxxi_lsl0_s; + HighOp = AArch64::SUBxxi_lsl12_s; + NumBytes = abs64(NumBytes); + } + + // If we're here, at the very least a move needs to be produced, which just + // happens to be materializable by an ADD. + if ((NumBytes & 0xfff) || NumBytes == 0) { + BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(NumBytes & 0xfff) + .setMIFlag(MIFlags); + + // Next update should use the register we've just defined. + SrcReg = DstReg; + } + + if (NumBytes & 0xfff000) { + BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(NumBytes >> 12) + .setMIFlag(MIFlags); + } +} + +void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned ScratchReg, int64_t NumBytes, + MachineInstr::MIFlag MIFlags) { + emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16, + NumBytes, MIFlags); +} + + +namespace { + struct LDTLSCleanup : public MachineFunctionPass { + static char ID; + LDTLSCleanup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + AArch64MachineFunctionInfo* MFI + = MF.getInfo<AArch64MachineFunctionInfo>(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case AArch64::TLSDESC_BLRx: + // Make sure it's a local dynamic access. + if (!I->getOperand(1).isSymbol() || + strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + break; + + if (TLSBaseAddrReg) + I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); + I != E; ++I) { + Changed |= VisitNode(*I, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast<const AArch64TargetMachine *>(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the + // code sequence assumes the address will be. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + AArch64::X0) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_base_addr instruction. + I->eraseFromParent(); + + return Copy; + } + + // Create a virtal register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast<const AArch64TargetMachine *>(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); + + // Insert a copy from X0 to TLSBaseAddrReg for later. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + *TLSBaseAddrReg) + .addReg(AArch64::X0); + + return Copy; + } + + virtual const char *getPassName() const { + return "Local Dynamic TLS Access Clean-up"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char LDTLSCleanup::ID = 0; +FunctionPass* +llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h new file mode 100644 index 0000000..22a2ab4 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -0,0 +1,112 @@ +//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64INSTRINFO_H +#define LLVM_TARGET_AARCH64INSTRINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "AArch64RegisterInfo.h" + +#define GET_INSTRINFO_HEADER +#include "AArch64GenInstrInfo.inc" + +namespace llvm { + +class AArch64Subtarget; + +class AArch64InstrInfo : public AArch64GenInstrInfo { + const AArch64RegisterInfo RI; + const AArch64Subtarget &Subtarget; +public: + explicit AArch64InstrInfo(const AArch64Subtarget &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const TargetRegisterInfo &getRegisterInfo() const { return RI; } + + const AArch64Subtarget &getSubTarget() const { return Subtarget; } + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + /// Look through the instructions in this function and work out the largest + /// the stack frame can be while maintaining the ability to address local + /// slots with no complexities. + unsigned estimateRSStackLimit(MachineFunction &MF) const; + + /// getAddressConstraints - For loads and stores (and PRFMs) taking an + /// immediate offset, this function determines the constraints required for + /// the immediate. It must satisfy: + /// + MinOffset <= imm <= MaxOffset + /// + imm % OffsetScale == 0 + void getAddressConstraints(const MachineInstr &MI, int &AccessScale, + int &MinOffset, int &MaxOffset) const; + + + unsigned getInstSizeInBytes(const MachineInstr &MI) const; + + unsigned getInstBundleLength(const MachineInstr &MI) const; + +}; + +bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo &TII); + + +void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, + int64_t NumBytes, + MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); + +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned ScratchReg, int64_t NumBytes, + MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); + +} + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td new file mode 100644 index 0000000..37be5e4 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -0,0 +1,5099 @@ +//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the AArch64 scalar instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "AArch64InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Target-specific ISD nodes and profiles +//===----------------------------------------------------------------------===// + +def SDT_A64ret : SDTypeProfile<0, 0, []>; +def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain, + SDNPOptInGlue, + SDNPVariadic]>; + +// (ins NZCV, Condition, Dest) +def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>; +def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>; + +// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition) +def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<2, 3>]>; +def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>; + +// (outs NZCV), (ins LHS, RHS, Condition) +def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>]>; +def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>; + + +// (outs GPR64), (ins) +def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; + +// A64 compares don't care about the cond really (they set all flags) so a +// simple binary operator is useful. +def A64cmp : PatFrag<(ops node:$lhs, node:$rhs), + (A64setcc node:$lhs, node:$rhs, cond)>; + + +// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN +// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C +// and V flags can be set differently by this operation. It comes down to +// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are +// then everything is fine. If not then the optimization is wrong. Thus general +// comparisons are only valid if op2 != 0. + +// So, finally, the only LLVM-native comparisons that don't mention C and V are +// SETEQ and SETNE. They're the only ones we can safely use CMN for in the +// absence of information about op2. +def equality_cond : PatLeaf<(cond), [{ + return N->get() == ISD::SETEQ || N->get() == ISD::SETNE; +}]>; + +def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), + (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>; + +// There are two layers of indirection here, driven by the following +// considerations. +// + TableGen does not know CodeModel or Reloc so that decision should be +// made for a variable/address at ISelLowering. +// + The output of ISelLowering should be selectable (hence the Wrapper, +// rather than a bare target opcode) +def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>, + SDTCisPtrTy<0>]>; + +def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>; + + +def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; +def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad, + [SDNPHasChain]>; + + +// (A64BFI LHS, RHS, LSB, Width) +def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i64>, + SDTCisVT<4, i64>]>; + +def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>; + +// (A64EXTR HiReg, LoReg, LSB) +def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisVT<3, i64>]>; +def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; + +// (A64[SU]BFX Field, ImmR, ImmS). +// +// Note that ImmR and ImmS are already encoded for the actual instructions. The +// more natural LSB and Width mix together to form ImmR and ImmS, something +// which TableGen can't handle. +def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; +def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; + +def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; + +//===----------------------------------------------------------------------===// +// Call sequence pseudo-instructions +//===----------------------------------------------------------------------===// + + +def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// The TLSDESCCALL node is a variant call which goes to an indirectly calculated +// destination but needs a relocation against a fixed symbol. As such it has two +// certain operands: the callee and the relocated variable. +// +// The TLS ABI only allows it to be selected to a BLR instructin (with +// appropriate relocation). +def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + +def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + + +def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>; +def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; + +def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>; +def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + + + +// These pseudo-instructions have special semantics by virtue of being passed to +// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by +// LowerCall to (in our case) tell the back-end about stack adjustments for +// arguments passed on the stack. Here we select those markers to +// pseudo-instructions which explicitly set the stack, and finally in the +// RegisterInfo we convert them to a true stack adjustment. +let Defs = [XSP], Uses = [XSP] in { + def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt), + [(AArch64callseq_start timm:$amt)]>; + + def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; +} + +//===----------------------------------------------------------------------===// +// Atomic operation pseudo-instructions +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1 in { +multiclass AtomicSizes<string opname> { + def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>; + def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>; + def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>; + def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), + [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>; +} +} + +defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">; +defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">; +defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; +defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; +defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; +defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; +defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; +let Defs = [NZCV] in { + // These operations need a CMP to calculate the correct value + defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; + defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; + defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; + defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; +} + +let usesCustomInserter = 1, Defs = [NZCV] in { +def ATOMIC_CMP_SWAP_I8 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>; +def ATOMIC_CMP_SWAP_I16 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>; +def ATOMIC_CMP_SWAP_I32 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>; +def ATOMIC_CMP_SWAP_I64 + : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new), + [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>; +} + +//===----------------------------------------------------------------------===// +// Add-subtract (extended register) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP + +// The RHS of these operations is conceptually a sign/zero-extended +// register, optionally shifted left by 1-4. The extension can be a +// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but +// must be specified with one exception: + +// If one of the registers is sp/wsp then LSL is an alias for UXTW in +// 32-bit instructions and UXTX in 64-bit versions, the shift amount +// is not optional in that case (but can explicitly be 0), and the +// entire suffix can be skipped (e.g. "add sp, x3, x2"). + +multiclass extend_operands<string PREFIX, string Diag> { + def _asmoperand : AsmOperandClass { + let Name = PREFIX; + let RenderMethod = "addRegExtendOperands"; + let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">"; + let DiagnosticType = "AddSubRegExtend" # Diag; + } + + def _operand : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> { + let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">"; + let DecoderMethod = "DecodeRegExtendOperand"; + let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand"); + } +} + +defm UXTB : extend_operands<"UXTB", "Small">; +defm UXTH : extend_operands<"UXTH", "Small">; +defm UXTW : extend_operands<"UXTW", "Small">; +defm UXTX : extend_operands<"UXTX", "Large">; +defm SXTB : extend_operands<"SXTB", "Small">; +defm SXTH : extend_operands<"SXTH", "Small">; +defm SXTW : extend_operands<"SXTW", "Small">; +defm SXTX : extend_operands<"SXTX", "Large">; + +def LSL_extasmoperand : AsmOperandClass { + let Name = "RegExtendLSL"; + let RenderMethod = "addRegExtendOperands"; + let DiagnosticType = "AddSubRegExtendLarge"; +} + +def LSL_extoperand : Operand<i64> { + let ParserMatchClass = LSL_extasmoperand; +} + + +// The patterns for various sign-extensions are a little ugly and +// non-uniform because everything has already been promoted to the +// legal i64 and i32 types. We'll wrap the various variants up in a +// class for use later. +class extend_types { + dag uxtb; dag uxth; dag uxtw; dag uxtx; + dag sxtb; dag sxth; dag sxtw; dag sxtx; + ValueType ty; + RegisterClass GPR; +} + +def extends_to_i64 : extend_types { + let uxtb = (and (anyext i32:$Rm), 255); + let uxth = (and (anyext i32:$Rm), 65535); + let uxtw = (zext i32:$Rm); + let uxtx = (i64 $Rm); + + let sxtb = (sext_inreg (anyext i32:$Rm), i8); + let sxth = (sext_inreg (anyext i32:$Rm), i16); + let sxtw = (sext i32:$Rm); + let sxtx = (i64 $Rm); + + let ty = i64; + let GPR = GPR64xsp; +} + + +def extends_to_i32 : extend_types { + let uxtb = (and i32:$Rm, 255); + let uxth = (and i32:$Rm, 65535); + let uxtw = (i32 i32:$Rm); + let uxtx = (i32 i32:$Rm); + + let sxtb = (sext_inreg i32:$Rm, i8); + let sxth = (sext_inreg i32:$Rm, i16); + let sxtw = (i32 i32:$Rm); + let sxtx = (i32 i32:$Rm); + + let ty = i32; + let GPR = GPR32wsp; +} + +// Now, six of the extensions supported are easy and uniform: if the source size +// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate +// those instructions in one block. + +// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me: +// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would +// be impossible. +// + Patterns are very different as well. +// + Passing different registers would be ugly (more fields in extend_types +// would probably be the best option). +multiclass addsub_exts<bit sf, bit op, bit S, string asmop, + SDPatternOperator opfrag, + dag outs, extend_types exts> { + def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000, + outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))], + NoItinerary>; + def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001, + outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))], + NoItinerary>; + def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010, + outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))], + NoItinerary>; + + def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100, + outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))], + NoItinerary>; + def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101, + outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))], + NoItinerary>; + def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110, + outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))], + NoItinerary>; +} + +// These two could be merge in with the above, but their patterns aren't really +// necessary and the naming-scheme would necessarily break: +multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag, + dag outs> { + def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011, + outs, + (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))], + NoItinerary>; + + def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111, + outs, + (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: same as uxtx */], + NoItinerary>; +} + +multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> { + def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, + outs, + (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No pattern: probably same as uxtw */], + NoItinerary>; + + def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111, + outs, + (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: probably same as uxtw */], + NoItinerary>; +} + +class SetRD<RegisterClass RC, SDPatternOperator op> + : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>; +class SetNZCV<SDPatternOperator op> + : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>; + +defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>, + (outs GPR64xsp:$Rd), extends_to_i64>, + addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>, + (outs GPR64xsp:$Rd)>; +defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>, + (outs GPR32wsp:$Rd), extends_to_i32>, + addsub_wxtx< 0b0, 0b0, "add\t$Rd, ", + (outs GPR32wsp:$Rd)>; +defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>, + (outs GPR64xsp:$Rd), extends_to_i64>, + addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>, + (outs GPR64xsp:$Rd)>; +defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>, + (outs GPR32wsp:$Rd), extends_to_i32>, + addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ", + (outs GPR32wsp:$Rd)>; + +let Defs = [NZCV] in { +defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>, + (outs GPR64:$Rd), extends_to_i64>, + addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>, + (outs GPR64:$Rd)>; +defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>, + (outs GPR32:$Rd), extends_to_i32>, + addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ", + (outs GPR32:$Rd)>; +defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>, + (outs GPR64:$Rd), extends_to_i64>, + addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>, + (outs GPR64:$Rd)>; +defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>, + (outs GPR32:$Rd), extends_to_i32>, + addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ", + (outs GPR32:$Rd)>; + + +let Rd = 0b11111, isCompare = 1 in { +defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, + (outs), extends_to_i64>, + addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>; +defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, + (outs), extends_to_i32>, + addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>; +defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, + (outs), extends_to_i64>, + addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>; +defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, + (outs), extends_to_i32>, + addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>; +} +} + +// Now patterns for the operation without a shift being needed. No patterns are +// created for uxtx/sxtx since they're non-uniform and it's expected that +// add/sub (shifted register) will handle those cases anyway. +multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop, + extend_types exts> { + def : Pat<(nodeop exts.ty:$Rn, exts.uxtb), + (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>; + def : Pat<(nodeop exts.ty:$Rn, exts.uxth), + (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>; + def : Pat<(nodeop exts.ty:$Rn, exts.uxtw), + (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>; + + def : Pat<(nodeop exts.ty:$Rn, exts.sxtb), + (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>; + def : Pat<(nodeop exts.ty:$Rn, exts.sxth), + (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>; + def : Pat<(nodeop exts.ty:$Rn, exts.sxtw), + (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>; +} + +defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>; +defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>; +defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>; +defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>; + +defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>; +defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>; +defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>; +defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>; + +// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is +// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the +// operation. Also permitted in this case is complete omission of the argument, +// which implies "lsl #0". +multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd, + RegisterClass GPR_Rn, RegisterClass GPR_Rm> { + def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>; + + def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"), + (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>; + +} + +defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>; +defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>; +defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>; +defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>; +defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>; +defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>; +defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>; +defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>; + +// Rd cannot be sp for flag-setting variants so only half of the aliases are +// needed. +defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>; +defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>; +defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>; +defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>; + +// CMP unfortunately has to be different because the instruction doesn't have a +// dest register. +multiclass cmp_lsl_aliases<string asmop, Instruction inst, + RegisterClass GPR_Rn, RegisterClass GPR_Rm> { + def : InstAlias<!strconcat(asmop, " $Rn, $Rm"), + (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>; + + def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"), + (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>; +} + +defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>; +defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>; +defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>; +defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>; + +//===----------------------------------------------------------------------===// +// Add-subtract (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV + +// These instructions accept a 12-bit unsigned immediate, optionally shifted +// left by 12 bits. Official assembly format specifies a 12 bit immediate with +// one of "", "LSL #0", "LSL #12" supplementary operands. + +// There are surprisingly few ways to make this work with TableGen, so this +// implementation has separate instructions for the "LSL #0" and "LSL #12" +// variants. + +// If the MCInst retained a single combined immediate (which could be 0x123000, +// for example) then both components (imm & shift) would have to be delegated to +// a single assembly operand. This would entail a separate operand parser +// (because the LSL would have to live in the same AArch64Operand as the +// immediate to be accessible); assembly parsing is rather complex and +// error-prone C++ code. +// +// By splitting the immediate, we can delegate handling this optional operand to +// an InstAlias. Supporting functions to generate the correct MCInst are still +// required, but these are essentially trivial and parsing can remain generic. +// +// Rejected plans with rationale: +// ------------------------------ +// +// In an ideal world you'de have two first class immediate operands (in +// InOperandList, specifying imm12 and shift). Unfortunately this is not +// selectable by any means I could discover. +// +// An Instruction with two MCOperands hidden behind a single entry in +// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional, +// but required more C++ code to handle encoding/decoding. Parsing (the intended +// main beneficiary) ended up equally complex because of the optional nature of +// "LSL #0". +// +// Attempting to circumvent the need for a custom OperandParser above by giving +// InstAliases without the "lsl #0" failed. add/sub could be accommodated but +// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands +// should be parsed: there was no way to accommodate an "lsl #12". + +let ParserMethod = "ParseImmWithLSLOperand", + RenderMethod = "addImmWithLSLOperands" in { + // Derived PredicateMethod fields are different for each + def addsubimm_lsl0_asmoperand : AsmOperandClass { + let Name = "AddSubImmLSL0"; + // If an error is reported against this operand, instruction could also be a + // register variant. + let DiagnosticType = "AddSubSecondSource"; + } + + def addsubimm_lsl12_asmoperand : AsmOperandClass { + let Name = "AddSubImmLSL12"; + let DiagnosticType = "AddSubSecondSource"; + } +} + +def shr_12_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32); +}]>; + +def shr_12_neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32); +}]>; + +def neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32); +}]>; + + +multiclass addsub_imm_operands<ValueType ty> { + let PrintMethod = "printAddSubImmLSL0Operand", + EncoderMethod = "getAddSubImmOpValue", + ParserMatchClass = addsubimm_lsl0_asmoperand in { + def _posimm_lsl0 : Operand<ty>, + ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>; + def _negimm_lsl0 : Operand<ty>, + ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }], + neg_XFORM>; + } + + let PrintMethod = "printAddSubImmLSL12Operand", + EncoderMethod = "getAddSubImmOpValue", + ParserMatchClass = addsubimm_lsl12_asmoperand in { + def _posimm_lsl12 : Operand<ty>, + ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }], + shr_12_XFORM>; + + def _negimm_lsl12 : Operand<ty>, + ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }], + shr_12_neg_XFORM>; + } +} + +// The add operands don't need any transformation +defm addsubimm_operand_i32 : addsub_imm_operands<i32>; +defm addsubimm_operand_i64 : addsub_imm_operands<i64>; + +multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift, + string asmop, string cmpasmop, + Operand imm_operand, Operand cmp_imm_operand, + RegisterClass GPR, RegisterClass GPRsp, + AArch64Reg ZR, ValueType Ty> { + // All registers for non-S variants allow SP + def _s : A64I_addsubimm<sf, op, 0b0, shift, + (outs GPRsp:$Rd), + (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(asmop, "\t$Rd, $Rn, $Imm12"), + [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))], + NoItinerary>; + + + // S variants can read SP but would write to ZR + def _S : A64I_addsubimm<sf, op, 0b1, shift, + (outs GPR:$Rd), + (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"), + [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))], + NoItinerary> { + let Defs = [NZCV]; + } + + // Note that the pattern here for ADDS is subtle. Canonically CMP + // a, b becomes SUBS a, b. If b < 0 then this is equivalent to + // ADDS a, (-b). This is not true in general. + def _cmp : A64I_addsubimm<sf, op, 0b1, shift, + (outs), (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(cmpasmop, " $Rn, $Imm12"), + [(set NZCV, + (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))], + NoItinerary> { + let Rd = 0b11111; + let Defs = [NZCV]; + let isCompare = 1; + } +} + + +multiclass addsubimm_shifts<string prefix, bit sf, bit op, + string asmop, string cmpasmop, string operand, string cmpoperand, + RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR, + ValueType Ty> { + defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00, + asmop, cmpasmop, + !cast<Operand>(operand # "_lsl0"), + !cast<Operand>(cmpoperand # "_lsl0"), + GPR, GPRsp, ZR, Ty>; + + defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01, + asmop, cmpasmop, + !cast<Operand>(operand # "_lsl12"), + !cast<Operand>(cmpoperand # "_lsl12"), + GPR, GPRsp, ZR, Ty>; +} + +defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn", + "addsubimm_operand_i32_posimm", + "addsubimm_operand_i32_negimm", + GPR32, GPR32wsp, WZR, i32>; +defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn", + "addsubimm_operand_i64_posimm", + "addsubimm_operand_i64_negimm", + GPR64, GPR64xsp, XZR, i64>; +defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp", + "addsubimm_operand_i32_negimm", + "addsubimm_operand_i32_posimm", + GPR32, GPR32wsp, WZR, i32>; +defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp", + "addsubimm_operand_i64_negimm", + "addsubimm_operand_i64_posimm", + GPR64, GPR64xsp, XZR, i64>; + +multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> { + def _fromsp : InstAlias<"mov $Rd, $Rn", + (addop GPRsp:$Rd, SP:$Rn, 0), + 0b1>; + + def _tosp : InstAlias<"mov $Rd, $Rn", + (addop SP:$Rd, GPRsp:$Rn, 0), + 0b1>; +} + +// Recall Rxsp is a RegisterClass containing *just* xsp. +defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>; +defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>; + +//===----------------------------------------------------------------------===// +// Add-subtract (shifted register) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS + +//===------------------------------- +// 1. The "shifed register" operands. Shared with logical insts. +//===------------------------------- + +multiclass shift_operands<string prefix, string form> { + def _asmoperand_i32 : AsmOperandClass { + let Name = "Shift" # form # "i32"; + let RenderMethod = "addShiftOperands"; + let PredicateMethod = "isShift<A64SE::" # form # ", false>"; + let DiagnosticType = "AddSubRegShift32"; + } + + // Note that the operand type is intentionally i64 because the DAGCombiner + // puts these into a canonical form. + def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # "_asmoperand_i32"); + let PrintMethod = "printShiftOperand<A64SE::" # form # ">"; + let DecoderMethod = "Decode32BitShiftOperand"; + } + + def _asmoperand_i64 : AsmOperandClass { + let Name = "Shift" # form # "i64"; + let RenderMethod = "addShiftOperands"; + let PredicateMethod = "isShift<A64SE::" # form # ", true>"; + let DiagnosticType = "AddSubRegShift64"; + } + + def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # "_asmoperand_i64"); + let PrintMethod = "printShiftOperand<A64SE::" # form # ">"; + } +} + +defm lsl_operand : shift_operands<"lsl_operand", "LSL">; +defm lsr_operand : shift_operands<"lsr_operand", "LSR">; +defm asr_operand : shift_operands<"asr_operand", "ASR">; + +// Not used for add/sub, but defined here for completeness. The "logical +// (shifted register)" instructions *do* have an ROR variant. +defm ror_operand : shift_operands<"ror_operand", "ROR">; + +//===------------------------------- +// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions. +//===------------------------------- + +// N.b. the commutable parameter is just !N. It will be first against the wall +// when the revolution comes. +multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable, + string asmop, SDPatternOperator opfrag, ValueType ty, + RegisterClass GPR, list<Register> defs> { + let isCommutable = commutable, Defs = defs in { + def _lsl : A64I_addsubshift<sf, op, s, 0b00, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_addsubshift<sf, op, s, 0b01, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_addsubshift<sf, op, s, 0b10, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn, + GPR:$Rm, 0)>; + + def : Pat<(opfrag ty:$Rn, ty:$Rm), + (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>; +} + +multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable, + string asmop, SDPatternOperator opfrag, + list<Register> defs> { + defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s, + commutable, asmop, opfrag, i64, GPR64, defs>; + defm www : addsub_shifts<prefix # "www", 0b0, op, s, + commutable, asmop, opfrag, i32, GPR32, defs>; +} + + +defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>; +defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>; + +defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>; +defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>; + +//===------------------------------- +// 1. The NEG/NEGS aliases +//===------------------------------- + +multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR, + ValueType ty, Operand shift_operand, SDNode shiftop> { + def : InstAlias<"neg $Rd, $Rm, $Imm6", + (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; + + def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)), + (INST ZR, $Rm, shift_operand:$Imm6)>; +} + +defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>; +defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>; +defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>; +def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; +def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>; + +defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>; +defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>; +defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>; +def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; +def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>; + +// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to +// be involved. +class negs_alias<Instruction INST, RegisterClass GPR, + Register ZR, Operand shift_operand, SDNode shiftop> + : InstAlias<"negs $Rd, $Rm, $Imm6", + (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; + +def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>; +def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>; +def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>; +def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; + +def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>; +def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>; +def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>; +def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; + +//===------------------------------- +// 1. The CMP/CMN aliases +//===------------------------------- + +multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable, + string asmop, SDPatternOperator opfrag, ValueType ty, + RegisterClass GPR> { + let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in { + def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_addsubshift<sf, op, 0b1, 0b10, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(opfrag ty:$Rn, ty:$Rm), + (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>; +} + +defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>; +defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>; + +defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>; +defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>; + +//===----------------------------------------------------------------------===// +// Add-subtract (with carry) instructions +//===----------------------------------------------------------------------===// +// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS + +multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> { + let Uses = [NZCV] in { + def www : A64I_addsubcarry<0b0, op, s, 0b000000, + (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + [], NoItinerary>; + + def xxx : A64I_addsubcarry<0b1, op, s, 0b000000, + (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + [], NoItinerary>; + } +} + +let isCommutable = 1 in { + defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">; +} + +defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">; + +let Defs = [NZCV] in { + let isCommutable = 1 in { + defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">; + } + + defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">; +} + +def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>; +def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>; +def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>; +def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>; + +// Note that adde and sube can form a chain longer than two (e.g. for 256-bit +// addition). So the flag-setting instructions are appropriate. +def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>; +def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>; +def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>; +def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>; + +//===----------------------------------------------------------------------===// +// Bitfield +//===----------------------------------------------------------------------===// +// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL, +// UBFIZ, UBFX + +// Because of the rather complicated nearly-overlapping aliases, the decoding of +// this range of instructions is handled manually. The architectural +// instructions are BFM, SBFM and UBFM but a disassembler should never produce +// these. +// +// In the end, the best option was to use BFM instructions for decoding under +// almost all circumstances, but to create aliasing *Instructions* for each of +// the canonical forms and specify a completely custom decoder which would +// substitute the correct MCInst as needed. +// +// This also simplifies instruction selection, parsing etc because the MCInsts +// have a shape that's closer to their use in code. + +//===------------------------------- +// 1. The architectural BFM instructions +//===------------------------------- + +def uimm5_asmoperand : AsmOperandClass { + let Name = "UImm5"; + let PredicateMethod = "isUImm<5>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm5"; +} + +def uimm6_asmoperand : AsmOperandClass { + let Name = "UImm6"; + let PredicateMethod = "isUImm<6>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm6"; +} + +def bitfield32_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> { + let ParserMatchClass = uimm5_asmoperand; + + let DecoderMethod = "DecodeBitfield32ImmOperand"; +} + + +def bitfield64_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> { + let ParserMatchClass = uimm6_asmoperand; + + // Default decoder works in 64-bit case: the 6-bit field can take any value. +} + +multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> { + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> { + let DecoderMethod = "DecodeBitfieldInstruction"; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> { + let DecoderMethod = "DecodeBitfieldInstruction"; + } +} + +defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">; +defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; + +// BFM instructions modify the destination register rather than defining it +// completely. +def BFMwwii : + A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + let DecoderMethod = "DecodeBitfieldInstruction"; + let Constraints = "$src = $Rd"; +} + +def BFMxxii : + A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + let DecoderMethod = "DecodeBitfieldInstruction"; + let Constraints = "$src = $Rd"; +} + + +//===------------------------------- +// 2. Extend aliases to 64-bit dest +//===------------------------------- + +// Unfortunately the extensions that end up as 64-bits cannot be handled by an +// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs +// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is +// not capable of such a map as far as I'm aware + +// Note that these instructions are strictly more specific than the +// BFM ones (in ImmR) so they can handle their own decoding. +class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty, + string asmop, bits<6> imms, dag pattern> + : A64I_bitfield<sf, opc, sf, + (outs GPRDest:$Rd), (ins GPR32:$Rn), + !strconcat(asmop, "\t$Rd, $Rn"), + [(set dty:$Rd, pattern)], NoItinerary> { + let ImmR = 0b000000; + let ImmS = imms; +} + +// Signed extensions +def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7, + (sext_inreg (anyext i32:$Rn), i8)>; +def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7, + (sext_inreg i32:$Rn, i8)>; +def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15, + (sext_inreg (anyext i32:$Rn), i16)>; +def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15, + (sext_inreg i32:$Rn, i16)>; +def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>; + +// Unsigned extensions +def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7, + (and i32:$Rn, 255)>; +def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15, + (and i32:$Rn, 65535)>; + +// The 64-bit unsigned variants are not strictly architectural but recommended +// for consistency. +let isAsmParserOnly = 1 in { + def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7, + (and (anyext i32:$Rn), 255)>; + def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15, + (and (anyext i32:$Rn), 65535)>; +} + +// Extra patterns for when the source register is actually 64-bits +// too. There's no architectural difference here, it's just LLVM +// shinanigans. There's no need for equivalent zero-extension patterns +// because they'll already be caught by logical (immediate) matching. +def : Pat<(sext_inreg i64:$Rn, i8), + (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>; +def : Pat<(sext_inreg i64:$Rn, i16), + (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>; +def : Pat<(sext_inreg i64:$Rn, i32), + (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>; + + +//===------------------------------- +// 3. Aliases for ASR and LSR (the simple shifts) +//===------------------------------- + +// These also handle their own decoding because ImmS being set makes +// them take precedence over BFM. +multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> { + def wwi : A64I_bitfield<0b0, opc, 0b0, + (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), + [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))], + NoItinerary> { + let ImmS = 31; + } + + def xxi : A64I_bitfield<0b1, opc, 0b1, + (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), + [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))], + NoItinerary> { + let ImmS = 63; + } + +} + +defm ASR : A64I_shift<0b00, "asr", sra>; +defm LSR : A64I_shift<0b10, "lsr", srl>; + +//===------------------------------- +// 4. Aliases for LSL +//===------------------------------- + +// Unfortunately LSL and subsequent aliases are much more complicated. We need +// to be able to say certain output instruction fields depend in a complex +// manner on combinations of input assembly fields). +// +// MIOperandInfo *might* have been able to do it, but at the cost of +// significantly more C++ code. + +// N.b. contrary to usual practice these operands store the shift rather than +// the machine bits in an MCInst. The complexity overhead of consistency +// outweighed the benefits in this case (custom asmparser, printer and selection +// vs custom encoder). +def bitfield32_lsl_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { + let ParserMatchClass = uimm5_asmoperand; + let EncoderMethod = "getBitfield32LSLOpValue"; +} + +def bitfield64_lsl_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { + let ParserMatchClass = uimm6_asmoperand; + let EncoderMethod = "getBitfield64LSLOpValue"; +} + +class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty, + Operand operand> + : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm), + "lsl\t$Rd, $Rn, $FullImm", + [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))], + NoItinerary> { + bits<12> FullImm; + let ImmR = FullImm{5-0}; + let ImmS = FullImm{11-6}; + + // No disassembler allowed because it would overlap with BFM which does the + // actual work. + let isAsmParserOnly = 1; +} + +def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>; +def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>; + +//===------------------------------- +// 5. Aliases for bitfield extract instructions +//===------------------------------- + +def bfx32_width_asmoperand : AsmOperandClass { + let Name = "BFX32Width"; + let PredicateMethod = "isBitfieldWidth<32>"; + let RenderMethod = "addBFXWidthOperands"; + let DiagnosticType = "Width32"; +} + +def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> { + let PrintMethod = "printBFXWidthOperand"; + let ParserMatchClass = bfx32_width_asmoperand; +} + +def bfx64_width_asmoperand : AsmOperandClass { + let Name = "BFX64Width"; + let PredicateMethod = "isBitfieldWidth<64>"; + let RenderMethod = "addBFXWidthOperands"; + let DiagnosticType = "Width64"; +} + +def bfx64_width : Operand<i64> { + let PrintMethod = "printBFXWidthOperand"; + let ParserMatchClass = bfx64_width_asmoperand; +} + + +multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> { + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))], + NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))], + NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } +} + +defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>; +defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; + +// Again, variants based on BFM modify Rd so need it as an input too. +def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +// SBFX instructions can do a 1-instruction sign-extension of boolean values. +def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>; +def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>; +def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)), + (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>; + +// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could +// use either 64-bit or 32-bit variant, but 32-bit might be more efficient. +def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), + sub_32)>; + +//===------------------------------- +// 6. Aliases for bitfield insert instructions +//===------------------------------- + +def bfi32_lsb_asmoperand : AsmOperandClass { + let Name = "BFI32LSB"; + let PredicateMethod = "isUImm<5>"; + let RenderMethod = "addBFILSBOperands<32>"; + let DiagnosticType = "UImm5"; +} + +def bfi32_lsb : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { + let PrintMethod = "printBFILSBOperand<32>"; + let ParserMatchClass = bfi32_lsb_asmoperand; +} + +def bfi64_lsb_asmoperand : AsmOperandClass { + let Name = "BFI64LSB"; + let PredicateMethod = "isUImm<6>"; + let RenderMethod = "addBFILSBOperands<64>"; + let DiagnosticType = "UImm6"; +} + +def bfi64_lsb : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { + let PrintMethod = "printBFILSBOperand<64>"; + let ParserMatchClass = bfi64_lsb_asmoperand; +} + +// Width verification is performed during conversion so width operand can be +// shared between 32/64-bit cases. Still needed for the print method though +// because ImmR encodes "width - 1". +def bfi32_width_asmoperand : AsmOperandClass { + let Name = "BFI32Width"; + let PredicateMethod = "isBitfieldWidth<32>"; + let RenderMethod = "addBFIWidthOperands"; + let DiagnosticType = "Width32"; +} + +def bfi32_width : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> { + let PrintMethod = "printBFIWidthOperand"; + let ParserMatchClass = bfi32_width_asmoperand; +} + +def bfi64_width_asmoperand : AsmOperandClass { + let Name = "BFI64Width"; + let PredicateMethod = "isBitfieldWidth<64>"; + let RenderMethod = "addBFIWidthOperands"; + let DiagnosticType = "Width64"; +} + +def bfi64_width : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> { + let PrintMethod = "printBFIWidthOperand"; + let ParserMatchClass = bfi64_width_asmoperand; +} + +multiclass A64I_bitfield_insert<bits<2> opc, string asmop> { + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } +} + +defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">; +defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; + + +def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +//===----------------------------------------------------------------------===// +// Compare and branch (immediate) +//===----------------------------------------------------------------------===// +// Contains: CBZ, CBNZ + +class label_asmoperand<int width, int scale> : AsmOperandClass { + let Name = "Label" # width # "_" # scale; + let PredicateMethod = "isLabel<" # width # "," # scale # ">"; + let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; + let DiagnosticType = "Label"; +} + +def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; + +// All conditional immediate branches are the same really: 19 signed bits scaled +// by the instruction-size (4). +def bcc_target : Operand<OtherVT> { + // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. + let ParserMatchClass = label_wid19_scal4_asmoperand; + let PrintMethod = "printLabelOperand<19, 4>"; + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>"; + let OperandType = "OPERAND_PCREL"; +} + +multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> { + let isBranch = 1, isTerminator = 1 in { + def x : A64I_cmpbr<0b1, op, + (outs), + (ins GPR64:$Rt, bcc_target:$Label), + !strconcat(asmop,"\t$Rt, $Label"), + [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)], + NoItinerary>; + + def w : A64I_cmpbr<0b0, op, + (outs), + (ins GPR32:$Rt, bcc_target:$Label), + !strconcat(asmop,"\t$Rt, $Label"), + [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)], + NoItinerary>; + } +} + +defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{ + return Imm == A64CC::EQ; +}]> >; +defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{ + return Imm == A64CC::NE; +}]> >; + +//===----------------------------------------------------------------------===// +// Conditional branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: B.cc + +def cond_code_asmoperand : AsmOperandClass { + let Name = "CondCode"; + let DiagnosticType = "CondCode"; +} + +def cond_code : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm <= 15; +}]> { + let PrintMethod = "printCondCodeOperand"; + let ParserMatchClass = cond_code_asmoperand; +} + +def Bcc : A64I_condbr<0b0, 0b0, (outs), + (ins cond_code:$Cond, bcc_target:$Label), + "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)], + NoItinerary> { + let Uses = [NZCV]; + let isBranch = 1; + let isTerminator = 1; +} + +//===----------------------------------------------------------------------===// +// Conditional compare (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: CCMN, CCMP + +def uimm4_asmoperand : AsmOperandClass { + let Name = "UImm4"; + let PredicateMethod = "isUImm<4>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm4"; +} + +def uimm4 : Operand<i32> { + let ParserMatchClass = uimm4_asmoperand; +} + +def uimm5 : Operand<i32> { + let ParserMatchClass = uimm5_asmoperand; +} + +// The only difference between this operand and the one for instructions like +// B.cc is that it's parsed manually. The other get parsed implicitly as part of +// the mnemonic handling. +def cond_code_op_asmoperand : AsmOperandClass { + let Name = "CondCodeOp"; + let RenderMethod = "addCondCodeOperands"; + let PredicateMethod = "isCondCode"; + let ParserMethod = "ParseCondCodeOperand"; + let DiagnosticType = "CondCode"; +} + +def cond_code_op : Operand<i32> { + let PrintMethod = "printCondCodeOperand"; + let ParserMatchClass = cond_code_op_asmoperand; +} + +class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop> + : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs), + (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"), + [], NoItinerary> { + let Defs = [NZCV]; +} + +def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">; +def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">; +def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">; +def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">; + +//===----------------------------------------------------------------------===// +// Conditional compare (register) instructions +//===----------------------------------------------------------------------===// +// Contains: CCMN, CCMP + +class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop> + : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1, + (outs), + (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), + [], NoItinerary> { + let Defs = [NZCV]; +} + +def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">; +def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">; +def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">; +def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">; + +//===----------------------------------------------------------------------===// +// Conditional select instructions +//===----------------------------------------------------------------------===// +// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG + +// Condition code which is encoded as the inversion (semantically rather than +// bitwise) in the instruction. +def inv_cond_code_op_asmoperand : AsmOperandClass { + let Name = "InvCondCodeOp"; + let RenderMethod = "addInvCondCodeOperands"; + let PredicateMethod = "isCondCode"; + let ParserMethod = "ParseCondCodeOperand"; + let DiagnosticType = "CondCode"; +} + +def inv_cond_code_op : Operand<i32> { + let ParserMatchClass = inv_cond_code_op_asmoperand; +} + +// Having a separate operand for the selectable use-case is debatable, but gives +// consistency with cond_code. +def inv_cond_XFORM : SDNodeXForm<imm, [{ + A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue()); + return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32); +}]>; + +def inv_cond_code + : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>; + + +multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop, + SDPatternOperator select> { + let Uses = [NZCV] in { + def wwwc : A64I_condsel<0b0, op, 0b0, op2, + (outs GPR32:$Rd), + (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), + [(set i32:$Rd, (select i32:$Rn, i32:$Rm))], + NoItinerary>; + + + def xxxc : A64I_condsel<0b1, op, 0b0, op2, + (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), + [(set i64:$Rd, (select i64:$Rn, i64:$Rm))], + NoItinerary>; + } +} + +def simple_select + : PatFrag<(ops node:$lhs, node:$rhs), + (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>; + +class complex_select<SDPatternOperator opnode> + : PatFrag<(ops node:$lhs, node:$rhs), + (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>; + + +defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>; +defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc", + complex_select<PatFrag<(ops node:$val), + (add node:$val, 1)>>>; +defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>; +defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>; + +// Now the instruction aliases, which fit nicely into LLVM's model: + +def : InstAlias<"cset $Rd, $Cond", + (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"cset $Rd, $Cond", + (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"csetm $Rd, $Cond", + (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"csetm $Rd, $Cond", + (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinc $Rd, $Rn, $Cond", + (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinc $Rd, $Rn, $Cond", + (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinv $Rd, $Rn, $Cond", + (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinv $Rd, $Rn, $Cond", + (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cneg $Rd, $Rn, $Cond", + (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cneg $Rd, $Rn, $Cond", + (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; + +// Finally some helper patterns. + +// For CSET (a.k.a. zero-extension of icmp) +def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), + (CSINCwwwc WZR, WZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), + (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>; + +def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), + (CSINCxxxc XZR, XZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), + (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>; + +// For CSETM (a.k.a. sign-extension of icmp) +def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), + (CSINVwwwc WZR, WZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), + (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>; + +def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), + (CSINVxxxc XZR, XZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), + (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>; + +// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of +// commutativity. The instructions are to complex for isCommutable to be used, +// so we have to create the patterns manually: + +// No commutable pattern for CSEL since the commuted version is isomorphic. + +// CSINC +def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond), + (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond), + (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>; + +// CSINV +def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond), + (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond), + (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>; + +// CSNEG +def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond), + (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond), + (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>; + +//===----------------------------------------------------------------------===// +// Data Processing (1 source) instructions +//===----------------------------------------------------------------------===// +// Contains: RBIT, REV16, REV, REV32, CLZ, CLS. + +// We define an unary operator which always fails. We will use this to +// define unary operators that cannot be matched. + +class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop, + list<dag> patterns, RegisterClass GPRrc, + InstrItinClass itin>: + A64I_dp_1src<sf, + 0, + 0b00000, + opcode, + !strconcat(asmop, "\t$Rd, $Rn"), + (outs GPRrc:$Rd), + (ins GPRrc:$Rn), + patterns, + itin>; + +multiclass A64I_dp_1src <bits<6> opcode, string asmop> { + let hasSideEffects = 0 in { + def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>; + def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>; + } +} + +defm RBIT : A64I_dp_1src<0b000000, "rbit">; +defm CLS : A64I_dp_1src<0b000101, "cls">; +defm CLZ : A64I_dp_1src<0b000100, "clz">; + +def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>; +def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>; +def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>; +def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>; + +def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>; +def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>; +def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>; +def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>; + + +def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev", + [(set i32:$Rd, (bswap i32:$Rn))], + GPR32, NoItinerary>; +def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev", + [(set i64:$Rd, (bswap i64:$Rn))], + GPR64, NoItinerary>; +def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32", + [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))], + GPR64, NoItinerary>; +def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16", + [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))], + GPR32, + NoItinerary>; +def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>; + +//===----------------------------------------------------------------------===// +// Data Processing (2 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, +// LSR, ASR, ROR + + +class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns, + RegisterClass GPRsp, + InstrItinClass itin>: + A64I_dp_2src<sf, + opcode, + 0, + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + (outs GPRsp:$Rd), + (ins GPRsp:$Rn, GPRsp:$Rm), + patterns, + itin>; + +multiclass dp_2src_crc<bit c, string asmop> { + def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0}, + !strconcat(asmop, "b"), [], GPR32, NoItinerary>; + def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1}, + !strconcat(asmop, "h"), [], GPR32, NoItinerary>; + def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0}, + !strconcat(asmop, "w"), [], GPR32, NoItinerary>; + def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0, + !strconcat(asmop, "x\t$Rd, $Rn, $Rm"), + (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [], + NoItinerary>; +} + +multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> { + def www : dp_2src_impl<0b0, + opcode, + asmop, + [(set i32:$Rd, + (op i32:$Rn, (i64 (zext i32:$Rm))))], + GPR32, + NoItinerary>; + def xxx : dp_2src_impl<0b1, + opcode, + asmop, + [(set i64:$Rd, (op i64:$Rn, i64:$Rm))], + GPR64, + NoItinerary>; +} + + +multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> { + def www : dp_2src_impl<0b0, + opcode, + asmop, + [(set i32:$Rd, (op i32:$Rn, i32:$Rm))], + GPR32, + NoItinerary>; + def xxx : dp_2src_impl<0b1, + opcode, + asmop, + [(set i64:$Rd, (op i64:$Rn, i64:$Rm))], + GPR64, + NoItinerary>; +} + +// Here we define the data processing 2 source instructions. +defm CRC32 : dp_2src_crc<0b0, "crc32">; +defm CRC32C : dp_2src_crc<0b1, "crc32c">; + +defm UDIV : dp_2src<0b000010, "udiv", udiv>; +defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; + +defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; +defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; +defm ASRV : dp_2src_zext<0b001010, "asr", sra>; +defm RORV : dp_2src_zext<0b001011, "ror", rotr>; + +// Extra patterns for an incoming 64-bit value for a 32-bit +// operation. Since the LLVM operations are undefined (as in C) if the +// RHS is out of range, it's perfectly permissible to discard the high +// bits of the GPR64. +def : Pat<(shl i32:$Rn, i64:$Rm), + (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; +def : Pat<(srl i32:$Rn, i64:$Rm), + (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; +def : Pat<(sra i32:$Rn, i64:$Rm), + (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; +def : Pat<(rotr i32:$Rn, i64:$Rm), + (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; + +// Here we define the aliases for the data processing 2 source instructions. +def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">; +def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">; +def ASR_menmonic : MnemonicAlias<"asrv", "asr">; +def ROR_menmonic : MnemonicAlias<"rorv", "ror">; + +//===----------------------------------------------------------------------===// +// Data Processing (3 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH +// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL + +class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg, + ValueType AccTy, RegisterClass SrcReg, + string asmop, dag pattern> + : A64I_dp3<sf, opcode, + (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"), + [(set AccTy:$Rd, pattern)], NoItinerary> { + RegisterClass AccGPR = AccReg; + RegisterClass SrcGPR = SrcReg; +} + +def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd", + (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>; +def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd", + (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>; + +def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub", + (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>; +def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub", + (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>; + +def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl", + (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; +def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl", + (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; + +def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl", + (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; +def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl", + (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; + +let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in { + def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm), + "umulh\t$Rd, $Rn, $Rm", + [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))], + NoItinerary>; + + def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm), + "smulh\t$Rd, $Rn, $Rm", + [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))], + NoItinerary>; +} + +multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST, + Register ZR, dag pattern> { + def : InstAlias<asmop # " $Rd, $Rn, $Rm", + (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>; + + def : Pat<pattern, (INST $Rn, $Rm, ZR)>; +} + +defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>; +defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>; + +defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR, + (sub 0, (mul i32:$Rn, i32:$Rm))>; +defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR, + (sub 0, (mul i64:$Rn, i64:$Rm))>; + +defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR, + (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>; +defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR, + (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; + +defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR, + (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>; +defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, + (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; + + +//===----------------------------------------------------------------------===// +// Exception generation +//===----------------------------------------------------------------------===// +// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3 + +def uimm16_asmoperand : AsmOperandClass { + let Name = "UImm16"; + let PredicateMethod = "isUImm<16>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm16"; +} + +def uimm16 : Operand<i32> { + let ParserMatchClass = uimm16_asmoperand; +} + +class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop> + : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16), + !strconcat(asmop, "\t$UImm16"), [], NoItinerary> { + let isBranch = 1; + let isTerminator = 1; +} + +def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">; +def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">; +def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">; +def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">; +def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">; + +def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">; +def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">; +def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">; + +// The immediate is optional for the DCPS instructions, defaulting to 0. +def : InstAlias<"dcps1", (DCPS1i 0)>; +def : InstAlias<"dcps2", (DCPS2i 0)>; +def : InstAlias<"dcps3", (DCPS3i 0)>; + +//===----------------------------------------------------------------------===// +// Extract (immediate) +//===----------------------------------------------------------------------===// +// Contains: EXTR + alias ROR + +def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0, + (outs GPR32:$Rd), + (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB), + "extr\t$Rd, $Rn, $Rm, $LSB", + [(set i32:$Rd, + (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))], + NoItinerary>; +def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1, + (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB), + "extr\t$Rd, $Rn, $Rm, $LSB", + [(set i64:$Rd, + (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))], + NoItinerary>; + +def : InstAlias<"ror $Rd, $Rs, $LSB", + (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>; +def : InstAlias<"ror $Rd, $Rs, $LSB", + (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>; + +def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB), + (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>; +def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB), + (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>; + +//===----------------------------------------------------------------------===// +// Floating-point compare instructions +//===----------------------------------------------------------------------===// +// Contains: FCMP, FCMPE + +def fpzero_asmoperand : AsmOperandClass { + let Name = "FPZero"; + let ParserMethod = "ParseFPImmOperand"; + let DiagnosticType = "FPZero"; +} + +def fpz32 : Operand<f32>, + ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> { + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; + let DecoderMethod = "DecodeFPZeroOperand"; +} + +def fpz64 : Operand<f64>, + ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> { + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; + let DecoderMethod = "DecodeFPZeroOperand"; +} + +multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> { + def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, + (outs), ins, "fcmp\t$Rn, $Rm", [pattern], + NoItinerary> { + let Defs = [NZCV]; + } + + def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, + (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> { + let Defs = [NZCV]; + } +} + +defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), + (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>; +defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), + (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>; + +// What would be Rm should be written as 0; note that even though it's called +// "$Rm" here to fit in with the InstrFormats, it's actually an immediate. +defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm), + (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>; + +defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm), + (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>; + + +//===----------------------------------------------------------------------===// +// Floating-point conditional compare instructions +//===----------------------------------------------------------------------===// +// Contains: FCCMP, FCCMPE + +class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop> + : A64I_fpccmp<0b0, 0b0, type, op, + (outs), + (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), + [], NoItinerary> { + let Defs = [NZCV]; +} + +def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">; +def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">; +def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">; +def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">; + +//===----------------------------------------------------------------------===// +// Floating-point conditional select instructions +//===----------------------------------------------------------------------===// +// Contains: FCSEL + +let Uses = [NZCV] in { + def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), + (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), + "fcsel\t$Rd, $Rn, $Rm, $Cond", + [(set f32:$Rd, + (simple_select f32:$Rn, f32:$Rm))], + NoItinerary>; + + + def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd), + (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond), + "fcsel\t$Rd, $Rn, $Rm, $Cond", + [(set f64:$Rd, + (simple_select f64:$Rn, f64:$Rm))], + NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (1 source) +//===----------------------------------------------------------------------===// +// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI]. + +def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val), + [{ (void)N; return false; }]>; + +// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d" +// syntax. Default to no pattern because most are odd enough not to have one. +multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr, + SDPatternOperator opnode = FPNoUnop> { + def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), + !strconcat(asmstr, "\t$Rd, $Rn"), + [(set f32:$Rd, (opnode f32:$Rn))], + NoItinerary>; + + def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn), + !strconcat(asmstr, "\t$Rd, $Rn"), + [(set f64:$Rd, (opnode f64:$Rn))], + NoItinerary>; +} + +defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">; +defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>; +defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>; +defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; + +defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">; +defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>; +defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>; +defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>; +defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">; +defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>; +defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>; + +// The FCVT instrucitons have different source and destination register-types, +// but the fields are uniform everywhere a D-register (say) crops up. Package +// this information in a Record. +class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> { + RegisterClass Class = rc; + ValueType VT = vt; + bit t1 = fld{1}; + bit t0 = fld{0}; +} + +def FCVT16 : FCVTRegType<FPR16, 0b11, f16>; +def FCVT32 : FCVTRegType<FPR32, 0b00, f32>; +def FCVT64 : FCVTRegType<FPR64, 0b01, f64>; + +class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode> + : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0}, + {0,0,0,1, DestReg.t1, DestReg.t0}, + (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn), + "fcvt\t$Rd, $Rn", + [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>; + +def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>; +def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>; +def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>; +def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>; +def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>; +def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>; + + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (2 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL + +def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs), + [{ (void)N; return false; }]>; + +multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr, + SDPatternOperator opnode> { + def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode, + (outs FPR32:$Rd), + (ins FPR32:$Rn, FPR32:$Rm), + !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), + [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))], + NoItinerary>; + + def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode, + (outs FPR64:$Rd), + (ins FPR64:$Rn, FPR64:$Rm), + !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), + [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))], + NoItinerary>; +} + +let isCommutable = 1 in { + defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; + defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>; + + // No patterns for these. + defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>; + defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>; + defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>; + defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>; + + defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", + PatFrag<(ops node:$lhs, node:$rhs), + (fneg (fmul node:$lhs, node:$rhs))> >; +} + +defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; +defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (3 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: FMADD, FMSUB, FNMADD, FNMSUB + +def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma (fneg node:$Rn), node:$Rm, node:$Ra)>; +def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; +def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>; + +class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT, + bits<2> type, bit o1, bit o0, SDPatternOperator fmakind> + : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd), + (ins FPR:$Rn, FPR:$Rm, FPR:$Ra), + !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"), + [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))], + NoItinerary>; + +def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>; +def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>; +def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>; +def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>; + +def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>; +def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>; +def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>; +def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; + +//===----------------------------------------------------------------------===// +// Floating-point <-> fixed-point conversion instructions +//===----------------------------------------------------------------------===// +// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF + +// #1-#32 allowed, encoded as "64 - <specified imm> +def fixedpos_asmoperand_i32 : AsmOperandClass { + let Name = "CVTFixedPos32"; + let RenderMethod = "addCVTFixedPosOperands"; + let PredicateMethod = "isCVTFixedPos<32>"; + let DiagnosticType = "CVTFixedPos32"; +} + +// Also encoded as "64 - <specified imm>" but #1-#64 allowed. +def fixedpos_asmoperand_i64 : AsmOperandClass { + let Name = "CVTFixedPos64"; + let RenderMethod = "addCVTFixedPosOperands"; + let PredicateMethod = "isCVTFixedPos<64>"; + let DiagnosticType = "CVTFixedPos64"; +} + +// We need the cartesian product of f32/f64 i32/i64 operands for +// conversions: +// + Selection needs to use operands of correct floating type +// + Assembly parsing and decoding depend on integer width +class cvtfix_i32_op<ValueType FloatVT> + : Operand<FloatVT>, + ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> { + let ParserMatchClass = fixedpos_asmoperand_i32; + let DecoderMethod = "DecodeCVT32FixedPosOperand"; + let PrintMethod = "printCVTFixedPosOperand"; +} + +class cvtfix_i64_op<ValueType FloatVT> + : Operand<FloatVT>, + ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> { + let ParserMatchClass = fixedpos_asmoperand_i64; + let PrintMethod = "printCVTFixedPosOperand"; +} + +// Because of the proliferation of weird operands, it's not really +// worth going for a multiclass here. Oh well. + +class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode, + RegisterClass GPR, RegisterClass FPR, + ValueType DstTy, ValueType SrcTy, + Operand scale_op, string asmop, SDNode cvtop> + : A64I_fpfixed<sf, 0b0, type, 0b11, opcode, + (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale), + !strconcat(asmop, "\t$Rd, $Rn, $Scale"), + [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))], + NoItinerary>; + +def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32, + cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>; +def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32, + cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>; +def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32, + cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>; +def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32, + cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>; + +def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64, + cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>; +def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64, + cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>; +def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64, + cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>; +def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64, + cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>; + + +class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode, + RegisterClass FPR, RegisterClass GPR, + ValueType DstTy, ValueType SrcTy, + Operand scale_op, string asmop, SDNode cvtop> + : A64I_fpfixed<sf, 0b0, type, 0b00, opcode, + (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale), + !strconcat(asmop, "\t$Rd, $Rn, $Scale"), + [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))], + NoItinerary>; + +def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32, + cvtfix_i32_op<f32>, "scvtf", sint_to_fp>; +def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64, + cvtfix_i64_op<f32>, "scvtf", sint_to_fp>; +def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32, + cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>; +def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64, + cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>; +def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32, + cvtfix_i32_op<f64>, "scvtf", sint_to_fp>; +def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64, + cvtfix_i64_op<f64>, "scvtf", sint_to_fp>; +def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32, + cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>; +def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64, + cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>; + +//===----------------------------------------------------------------------===// +// Floating-point <-> integer conversion instructions +//===----------------------------------------------------------------------===// +// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF + +class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode, + RegisterClass DestPR, RegisterClass SrcPR, string asmop> + : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn), + !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>; + +multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> { + def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, + GPR32, FPR32, asmop # "s">; + def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, + GPR64, FPR32, asmop # "s">; + def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, + GPR32, FPR32, asmop # "u">; + def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, + GPR64, FPR32, asmop # "u">; + + def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, + GPR32, FPR64, asmop # "s">; + def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, + GPR64, FPR64, asmop # "s">; + def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, + GPR32, FPR64, asmop # "u">; + def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, + GPR64, FPR64, asmop # "u">; +} + +defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">; +defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">; +defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; +defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; +defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; + +def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>; +def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>; +def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>; +def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>; +def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>; +def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>; +def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>; +def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>; + +multiclass A64I_inttofp<bit o0, string asmop> { + def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; + def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>; + def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>; + def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>; +} + +defm S : A64I_inttofp<0b0, "scvtf">; +defm U : A64I_inttofp<0b1, "ucvtf">; + +def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>; +def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>; +def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>; +def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>; +def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>; +def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>; +def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>; +def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>; + +def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; +def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; +def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; +def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; + +def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>; +def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>; +def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>; +def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>; + +def lane1_asmoperand : AsmOperandClass { + let Name = "Lane1"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "Lane1"; +} + +def lane1 : Operand<i32> { + let ParserMatchClass = lane1_asmoperand; + let PrintMethod = "printBareImmOperand"; +} + +let DecoderMethod = "DecodeFMOVLaneInstruction" in { + def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, + (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), + "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>; + + def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111, + (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane), + "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>; +} + +def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", + (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; + +def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", + (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; + +//===----------------------------------------------------------------------===// +// Floating-point immediate instructions +//===----------------------------------------------------------------------===// +// Contains: FMOV + +def fpimm_asmoperand : AsmOperandClass { + let Name = "FMOVImm"; + let ParserMethod = "ParseFPImmOperand"; + let DiagnosticType = "FPImm"; +} + +// The MCOperand for these instructions are the encoded 8-bit values. +def SDXF_fpimm : SDNodeXForm<fpimm, [{ + uint32_t Imm8; + A64Imms::isFPImm(N->getValueAPF(), Imm8); + return CurDAG->getTargetConstant(Imm8, MVT::i32); +}]>; + +class fmov_operand<ValueType FT> + : Operand<i32>, + PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }], + SDXF_fpimm> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = fpimm_asmoperand; +} + +def fmov32_operand : fmov_operand<f32>; +def fmov64_operand : fmov_operand<f64>; + +class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT, + Operand fmov_operand> + : A64I_fpimm<0b0, 0b0, type, 0b00000, + (outs Reg:$Rd), + (ins fmov_operand:$Imm8), + "fmov\t$Rd, $Imm8", + [(set VT:$Rd, fmov_operand:$Imm8)], + NoItinerary>; + +def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>; +def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; + +//===----------------------------------------------------------------------===// +// Load-register (literal) instructions +//===----------------------------------------------------------------------===// +// Contains: LDR, LDRSW, PRFM + +def ldrlit_label_asmoperand : AsmOperandClass { + let Name = "LoadLitLabel"; + let RenderMethod = "addLabelOperands<19, 4>"; + let DiagnosticType = "Label"; +} + +def ldrlit_label : Operand<i64> { + let EncoderMethod = "getLoadLitLabelOpValue"; + + // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<19, 4>"; + let ParserMatchClass = ldrlit_label_asmoperand; + let OperandType = "OPERAND_PCREL"; +} + +// Various instructions take an immediate value (which can always be used), +// where some numbers have a symbolic name to make things easier. These operands +// and the associated functions abstract away the differences. +multiclass namedimm<string prefix, string mapper> { + def _asmoperand : AsmOperandClass { + let Name = "NamedImm" # prefix; + let PredicateMethod = "isUImm"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; + let DiagnosticType = "NamedImm_" # prefix; + } + + def _op : Operand<i32> { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + let PrintMethod = "printNamedImmOperand<" # mapper # ">"; + let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">"; + } +} + +defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">; + +class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg, + list<dag> patterns = []> + : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19), + "ldr\t$Rt, $Imm19", patterns, NoItinerary>; + +let mayLoad = 1 in { + def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; + def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; +} + +def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>; +def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>; + +let mayLoad = 1 in { + def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; + + + def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, + (outs GPR64:$Rt), + (ins ldrlit_label:$Imm19), + "ldrsw\t$Rt, $Imm19", + [], NoItinerary>; + + def PRFM_lit : A64I_LDRlit<0b11, 0b0, + (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), + "prfm\t$Rt, $Imm19", + [], NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// Load-store exclusive instructions +//===----------------------------------------------------------------------===// +// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB, +// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB, +// STLRH, STLR, LDARB, LDARH, LDAR + +// Since these instructions have the undefined register bits set to 1 in +// their canonical form, we need a post encoder method to set those bits +// to 1 when encoding these instructions. We do this using the +// fixLoadStoreExclusive function. This function has template parameters: +// +// fixLoadStoreExclusive<int hasRs, int hasRt2> +// +// hasRs indicates that the instruction uses the Rs field, so we won't set +// it to 1 (and the same for Rt2). We don't need template parameters for +// the other register fiels since Rt and Rn are always used. + +// This operand parses a GPR64xsp register, followed by an optional immediate +// #0. +def GPR64xsp0_asmoperand : AsmOperandClass { + let Name = "GPR64xsp0"; + let PredicateMethod = "isWrappedReg"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "ParseLSXAddressOperand"; + // Diagnostics are provided by ParserMethod +} + +def GPR64xsp0 : RegisterOperand<GPR64xsp> { + let ParserMatchClass = GPR64xsp0_asmoperand; +} + +//===---------------------------------- +// Store-exclusive (releasing & normal) +//===---------------------------------- + +class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_stn <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rs, $Rt, [$Rn]"), + pat, itin> { + let mayStore = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; +} + +multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> { + def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; + + def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [],NoItinerary>; + + def _word: A64I_SRexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, + (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; +defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">; + +//===---------------------------------- +// Loads +//===---------------------------------- + +class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tn <size, + opcode{2}, 1, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, [$Rn]"), + pat, itin> { + let mayLoad = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +multiclass A64I_LRex<string asmstr, bits<3> opcode> { + def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _word: A64I_LRexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_LRexs_impl<0b11, opcode, asmstr, + (outs GPR64:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm LDXR : A64I_LRex<"ldxr", 0b000>; +defm LDAXR : A64I_LRex<"ldaxr", 0b001>; +defm LDAR : A64I_LRex<"ldar", 0b101>; + +class acquiring_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + return cast<AtomicSDNode>(N)->getOrdering() == Acquire; +}]>; + +def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; +def atomic_load_acquire_16 : acquiring_load<atomic_load_16>; +def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; +def atomic_load_acquire_64 : acquiring_load<atomic_load_64>; + +def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>; +def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>; +def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>; +def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>; + +//===---------------------------------- +// Store-release (no exclusivity) +//===---------------------------------- + +class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tn <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, [$Rn]"), + pat, itin> { + let mayStore = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +class releasing_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + return cast<AtomicSDNode>(N)->getOrdering() == Release; +}]>; + +def atomic_store_release_8 : releasing_store<atomic_store_8>; +def atomic_store_release_16 : releasing_store<atomic_store_16>; +def atomic_store_release_32 : releasing_store<atomic_store_32>; +def atomic_store_release_64 : releasing_store<atomic_store_64>; + +multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> { + def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_8 i64:$Rn, i32:$Rt)], + NoItinerary>; + + def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_16 i64:$Rn, i32:$Rt)], + NoItinerary>; + + def _word: A64I_SLexs_impl<0b10, opcode, asmstr, + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_32 i64:$Rn, i32:$Rt)], + NoItinerary>; + + def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, + (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_64 i64:$Rn, i64:$Rt)], + NoItinerary>; +} + +defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; + +//===---------------------------------- +// Store-exclusive pair (releasing & normal) +//===---------------------------------- + +class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_stt2n <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"), + pat, itin> { + let mayStore = 1; +} + + +multiclass A64I_SPex<string asmstr, bits<3> opcode> { + def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), + (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, + GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), + (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, + GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm STXP : A64I_SPex<"stxp", 0b010>; +defm STLXP : A64I_SPex<"stlxp", 0b011>; + +//===---------------------------------- +// Load-exclusive pair (acquiring & normal) +//===---------------------------------- + +class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tt2n <size, + opcode{2}, 1, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"), + pat, itin>{ + let mayLoad = 1; + let DecoderMethod = "DecodeLoadPairExclusiveInstruction"; + let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; +} + +multiclass A64I_LPex<string asmstr, bits<3> opcode> { + def _word: A64I_LPexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rt, GPR32:$Rt2), + (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm LDXP : A64I_LPex<"ldxp", 0b010>; +defm LDAXP : A64I_LPex<"ldaxp", 0b011>; + +//===----------------------------------------------------------------------===// +// Load-store register (unscaled immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (register offset) instructions +//===----------------------------------------------------------------------===// +// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (unsigned immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (immediate post-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (immediate pre-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW + +// Note that patterns are much later on in a completely separate section (they +// need ADRPxi to be defined). + +//===------------------------------- +// 1. Various operands needed +//===------------------------------- + +//===------------------------------- +// 1.1 Unsigned 12-bit immediate operands +//===------------------------------- +// The addressing mode for these instructions consists of an unsigned 12-bit +// immediate which is scaled by the size of the memory access. +// +// We represent this in the MC layer by two operands: +// 1. A base register. +// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]" +// would have '1' in this field. +// This means that separate functions are needed for converting representations +// which *are* aware of the intended access size. + +// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to +// know the access size via some means. An isolated operand does not have this +// information unless told from here, which means we need separate tablegen +// Operands for each access size. This multiclass takes care of instantiating +// the correct template functions in the rest of the backend. + +//===------------------------------- +// 1.1 Unsigned 12-bit immediate operands +//===------------------------------- + +multiclass offsets_uimm12<int MemSize, string prefix> { + def uimm12_asmoperand : AsmOperandClass { + let Name = "OffsetUImm12_" # MemSize; + let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; + let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreUImm12_" # MemSize; + } + + // Pattern is really no more than an ImmLeaf, but predicated on MemSize which + // complicates things beyond TableGen's ken. + def uimm12 : Operand<i64>, + ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # uimm12_asmoperand); + + let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">"; + let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">"; + } +} + +defm byte_ : offsets_uimm12<1, "byte_">; +defm hword_ : offsets_uimm12<2, "hword_">; +defm word_ : offsets_uimm12<4, "word_">; +defm dword_ : offsets_uimm12<8, "dword_">; +defm qword_ : offsets_uimm12<16, "qword_">; + +//===------------------------------- +// 1.1 Signed 9-bit immediate operands +//===------------------------------- + +// The MCInst is expected to store the bit-wise encoding of the value, +// which amounts to lopping off the extended sign bits. +def SDXF_simm9 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32); +}]>; + +def simm9_asmoperand : AsmOperandClass { + let Name = "SImm9"; + let PredicateMethod = "isSImm<9>"; + let RenderMethod = "addSImmOperands<9>"; + let DiagnosticType = "LoadStoreSImm9"; +} + +def simm9 : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }], + SDXF_simm9> { + let PrintMethod = "printOffsetSImm9Operand"; + let ParserMatchClass = simm9_asmoperand; +} + + +//===------------------------------- +// 1.3 Register offset extensions +//===------------------------------- + +// The assembly-syntax for these addressing-modes is: +// [<Xn|SP>, <R><m> {, <extend> {<amount>}}] +// +// The essential semantics are: +// + <amount> is a shift: #<log(transfer size)> or #0 +// + <R> can be W or X. +// + If <R> is W, <extend> can be UXTW or SXTW +// + If <R> is X, <extend> can be LSL or SXTX +// +// The trickiest of those constraints is that Rm can be either GPR32 or GPR64, +// which will need separate instructions for LLVM type-consistency. We'll also +// need separate operands, of course. +multiclass regexts<int MemSize, int RmSize, RegisterClass GPR, + string Rm, string prefix> { + def regext_asmoperand : AsmOperandClass { + let Name = "AddrRegExtend_" # MemSize # "_" # Rm; + let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">"; + let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize; + } + + def regext : Operand<i64> { + let PrintMethod + = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">"; + + let DecoderMethod = "DecodeAddrRegExtendOperand"; + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # regext_asmoperand); + } +} + +multiclass regexts_wx<int MemSize, string prefix> { + // Rm is an X-register if LSL or SXTX are specified as the shift. + defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">; + + // Rm is a W-register if UXTW or SXTW are specified as the shift. + defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">; +} + +defm byte_ : regexts_wx<1, "byte_">; +defm hword_ : regexts_wx<2, "hword_">; +defm word_ : regexts_wx<4, "word_">; +defm dword_ : regexts_wx<8, "dword_">; +defm qword_ : regexts_wx<16, "qword_">; + + +//===------------------------------ +// 2. The instructions themselves. +//===------------------------------ + +// We have the following instructions to implement: +// | | B | H | W | X | +// |-----------------+-------+-------+-------+--------| +// | unsigned str | STRB | STRH | STR | STR | +// | unsigned ldr | LDRB | LDRH | LDR | LDR | +// | signed ldr to W | LDRSB | LDRSH | - | - | +// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) | + +// This will instantiate the LDR/STR instructions you'd expect to use for an +// unsigned datatype (first two rows above) or floating-point register, which is +// reasonably uniform across all access sizes. + + +//===------------------------------ +// 2.1 Regular instructions +//===------------------------------ + +// This class covers the basic unsigned or irrelevantly-signed loads and stores, +// to general-purpose and floating-point registers. + +class AddrParams<string prefix> { + Operand uimm12 = !cast<Operand>(prefix # "_uimm12"); + + Operand regextWm = !cast<Operand>(prefix # "_Wm_regext"); + Operand regextXm = !cast<Operand>(prefix # "_Xm_regext"); +} + +def byte_addrparams : AddrParams<"byte">; +def hword_addrparams : AddrParams<"hword">; +def word_addrparams : AddrParams<"word">; +def dword_addrparams : AddrParams<"dword">; +def qword_addrparams : AddrParams<"qword">; + +multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v, + bit high_opc, string asmsuffix, + RegisterClass GPR, AddrParams params> { + // Unsigned immediate + def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0}, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12), + "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayStore = 1; + } + def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1}, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; + } + def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + // Register offset (four of these: load/store and Wm/Xm). + let mayLoad = 1 in { + def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0, + (outs GPR:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1, + (outs GPR:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + let mayStore = 1 in { + def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm, + params.regextWm:$Ext), + "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm, + params.regextXm:$Ext), + "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + // Unaligned immediate + def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0}, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayStore = 1; + } + def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1}, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; + } + def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + // Post-indexed + def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0}, + (outs GPR64xsp:$Rn_wb), + (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let mayStore = 1; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1}, + (outs GPR:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + // Pre-indexed + def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0}, + (outs GPR64xsp:$Rn_wb), + (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let mayStore = 1; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1}, + (outs GPR:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + +} + +// STRB/LDRB: First define the instructions +defm LS8 + : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>; + +// STRH/LDRH +defm LS16 + : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>; + + +// STR/LDR to/from a W register +defm LS32 + : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>; + +// STR/LDR to/from an X register +defm LS64 + : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; + +// STR/LDR to/from a B register +defm LSFP8 + : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; + +// STR/LDR to/from an H register +defm LSFP16 + : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>; + +// STR/LDR to/from an S register +defm LSFP32 + : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>; +// STR/LDR to/from a D register +defm LSFP64 + : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>; +// STR/LDR to/from a Q register +defm LSFP128 + : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, + qword_addrparams>; + +//===------------------------------ +// 2.3 Signed loads +//===------------------------------ + +// Byte and half-word signed loads can both go into either an X or a W register, +// so it's worth factoring out. Signed word loads don't fit because there is no +// W version. +multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params, + string prefix> { + // Unsigned offset + def w : A64I_LSunsigimm<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>; + + def x : A64I_LSunsigimm<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>; + + // Register offset + let mayLoad = 1 in { + def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + + let mayLoad = 1 in { + // Unaligned offset + def w_U : A64I_LSunalimm<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + def x_U : A64I_LSunalimm<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + + // Post-indexed + def w_PostInd : A64I_LSpostind<size, 0b0, 0b11, + (outs GPR32:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def x_PostInd : A64I_LSpostind<size, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + // Pre-indexed + def w_PreInd : A64I_LSpreind<size, 0b0, 0b11, + (outs GPR32:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def x_PreInd : A64I_LSpreind<size, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + } // let mayLoad = 1 +} + +// LDRSB +defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">; +// LDRSH +defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">; + +// LDRSW: load a 32-bit register, sign-extending to 64-bits. +def LDRSWx + : A64I_LSunsigimm<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, word_uimm12:$UImm12), + "ldrsw\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +let mayLoad = 1 in { + def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), + "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext), + "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; +} +def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]", + (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; + + +def LDURSWx + : A64I_LSunalimm<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldursw\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +def LDRSWx_PostInd + : A64I_LSpostind<0b10, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrsw\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; +} + +def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrsw\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; +} + +//===------------------------------ +// 2.4 Prefetch operations +//===------------------------------ + +def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), + "prfm\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"prfm $Rt, [$Rn]", + (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; + +let mayLoad = 1 in { + def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, + GPR32:$Rm, dword_Wm_regext:$Ext), + "prfm\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, dword_Xm_regext:$Ext), + "prfm\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; +} + +def : InstAlias<"prfm $Rt, [$Rn, $Rm]", + (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + +def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "prfum\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"prfum $Rt, [$Rn]", + (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; + +//===----------------------------------------------------------------------===// +// Load-store register (unprivileged) instructions +//===----------------------------------------------------------------------===// +// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH + +// These instructions very much mirror the "unscaled immediate" loads, but since +// there are no floating-point variants we need to split them out into their own +// section to avoid instantiation of "ldtr d0, [sp]" etc. + +multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR, + string prefix> { + def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayStore = 1; + } + + def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; + } + + def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + +} + +// STTRB/LDTRB: First define the instructions +defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">; + +// STTRH/LDTRH +defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">; + +// STTR/LDTR to/from a W register +defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">; + +// STTR/LDTR to/from an X register +defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">; + +// Now a class for the signed instructions that can go to either 32 or 64 +// bits... +multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> { + let mayLoad = 1 in { + def w : A64I_LSunpriv<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + def x : A64I_LSunpriv<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + } + + def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>; + + def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +} + +// LDTRSB +defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">; +// LDTRSH +defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">; + +// And finally LDTRSW which only goes to 64 bits. +def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrsw\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +//===----------------------------------------------------------------------===// +// Load-store register pair (offset) instructions +//===----------------------------------------------------------------------===// +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register pair (post-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STP, LDP, LDPSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register pair (pre-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STP, LDP, LDPSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store non-temporal register pair (offset) instructions +//===----------------------------------------------------------------------===// +// Contains: STNP, LDNP + + +// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to +// know the access size via some means. An isolated operand does not have this +// information unless told from here, which means we need separate tablegen +// Operands for each access size. This multiclass takes care of instantiating +// the correct template functions in the rest of the backend. + +multiclass offsets_simm7<string MemSize, string prefix> { + // The bare signed 7-bit immediate is used in post-indexed instructions, but + // because of the scaling performed a generic "simm7" operand isn't + // appropriate here either. + def simm7_asmoperand : AsmOperandClass { + let Name = "SImm7_Scaled" # MemSize; + let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; + let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreSImm7_" # MemSize; + } + + def simm7 : Operand<i64> { + let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">"; + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand"); + } +} + +defm word_ : offsets_simm7<"4", "word_">; +defm dword_ : offsets_simm7<"8", "dword_">; +defm qword_ : offsets_simm7<"16", "qword_">; + +multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg, + Operand simm7, string prefix> { + def _STR : A64I_LSPoffset<opc, v, 0b0, (outs), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayStore = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"stp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _LDR : A64I_LSPoffset<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"ldp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0, + (outs GPR64xsp:$Rn_wb), + (ins SomeReg:$Rt, SomeReg:$Rt2, + GPR64xsp:$Rn, + simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> { + let mayStore = 1; + let Constraints = "$Rn = $Rn_wb"; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> { + let mayStore = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayStore = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"stnp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + +} + + +defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; +defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; +defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; +defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; +defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, + "LSFPPair128">; + + +def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} +def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]", + (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>; + +def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} + +def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} + +//===----------------------------------------------------------------------===// +// Logical (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV + +multiclass logical_imm_operands<string prefix, string note, + int size, ValueType VT> { + def _asmoperand : AsmOperandClass { + let Name = "LogicalImm" # note # size; + let PredicateMethod = "isLogicalImm" # note # "<" # size # ">"; + let RenderMethod = "addLogicalImmOperands<" # size # ">"; + let DiagnosticType = "LogicalSecondSource"; + } + + def _operand + : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + let PrintMethod = "printLogicalImmOperand<" # size # ">"; + let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">"; + } +} + +defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>; +defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>; + +// The mov versions only differ in assembly parsing, where they +// exclude values representable with either MOVZ or MOVN. +defm logical_imm32_mov + : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>; +defm logical_imm64_mov + : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>; + + +multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> { + def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd), + (ins GPR32:$Rn, logical_imm32_operand:$Imm), + !strconcat(asmop, "\t$Rd, $Rn, $Imm"), + [(set i32:$Rd, + (opnode i32:$Rn, logical_imm32_operand:$Imm))], + NoItinerary>; + + def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd), + (ins GPR64:$Rn, logical_imm64_operand:$Imm), + !strconcat(asmop, "\t$Rd, $Rn, $Imm"), + [(set i64:$Rd, + (opnode i64:$Rn, logical_imm64_operand:$Imm))], + NoItinerary>; +} + +defm AND : A64I_logimmSizes<0b00, "and", and>; +defm ORR : A64I_logimmSizes<0b01, "orr", or>; +defm EOR : A64I_logimmSizes<0b10, "eor", xor>; + +let Defs = [NZCV] in { + def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), + (ins GPR32:$Rn, logical_imm32_operand:$Imm), + "ands\t$Rd, $Rn, $Imm", + [], NoItinerary>; + + def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd), + (ins GPR64:$Rn, logical_imm64_operand:$Imm), + "ands\t$Rd, $Rn, $Imm", + [], NoItinerary>; +} + + +def : InstAlias<"tst $Rn, $Imm", + (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>; +def : InstAlias<"tst $Rn, $Imm", + (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>; +def : InstAlias<"mov $Rd, $Imm", + (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>; +def : InstAlias<"mov $Rd, $Imm", + (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>; + +//===----------------------------------------------------------------------===// +// Logical (shifted register) instructions +//===----------------------------------------------------------------------===// +// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV + +// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS" +// behaves differently for unsigned comparisons, so we defensively only allow +// signed or n/a as the operand. In practice "unsigned greater than 0" is "not +// equal to 0" and LLVM gives us this. +def signed_cond : PatLeaf<(cond), [{ + return !isUnsignedIntSetCC(N->get()); +}]>; + + +// These instructions share their "shift" operands with add/sub (shifted +// register instructions). They are defined there. + +// N.b. the commutable parameter is just !N. It will be first against the wall +// when the revolution comes. +multiclass logical_shifts<string prefix, bit sf, bits<2> opc, + bit N, bit commutable, + string asmop, SDPatternOperator opfrag, ValueType ty, + RegisterClass GPR, list<Register> defs> { + let isCommutable = commutable, Defs = defs in { + def _lsl : A64I_logicalshift<sf, opc, 0b00, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_logicalshift<sf, opc, 0b01, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, opc, 0b10, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6)) + )], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, opc, 0b11, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("ror_operand_" # ty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm, + !cast<Operand>("ror_operand_" # ty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn, + GPR:$Rm, 0)>; + + def : Pat<(opfrag ty:$Rn, ty:$Rm), + (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>; +} + +multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable, + string asmop, SDPatternOperator opfrag, + list<Register> defs> { + defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N, + commutable, asmop, opfrag, i64, GPR64, defs>; + defm www : logical_shifts<prefix # "www", 0b0, opc, N, + commutable, asmop, opfrag, i32, GPR32, defs>; +} + + +defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>; +defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>; +defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>; +defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands", + PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), + [{ (void)N; return false; }]>, + [NZCV]>; + +defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic", + PatFrag<(ops node:$lhs, node:$rhs), + (and node:$lhs, (not node:$rhs))>, []>; +defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn", + PatFrag<(ops node:$lhs, node:$rhs), + (or node:$lhs, (not node:$rhs))>, []>; +defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon", + PatFrag<(ops node:$lhs, node:$rhs), + (xor node:$lhs, (not node:$rhs))>, []>; +defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics", + PatFrag<(ops node:$lhs, node:$rhs), + (and node:$lhs, (not node:$rhs)), + [{ (void)N; return false; }]>, + [NZCV]>; + +multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> { + let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in { + def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + + def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("ror_operand_" # ty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm, + !cast<Operand>("ror_operand_" # ty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + } + + def _noshift : InstAlias<"tst $Rn, $Rm", + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond), + (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>; +} + +defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>; +defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>; + + +multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> { + let isCommutable = 0, Rn = 0b11111 in { + def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set ty:$Rd, (not (shl ty:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6)))], + NoItinerary>; + + + def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set ty:$Rd, (not (srl ty:$Rm, + !cast<Operand>("lsr_operand_" # ty):$Imm6)))], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set ty:$Rd, (not (sra ty:$Rm, + !cast<Operand>("asr_operand_" # ty):$Imm6)))], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("ror_operand_" # ty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set ty:$Rd, (not (rotr ty:$Rm, + !cast<Operand>("lsl_operand_" # ty):$Imm6)))], + NoItinerary>; + } + + def _noshift : InstAlias<"mvn $Rn, $Rm", + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(not ty:$Rm), + (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>; +} + +defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>; +defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>; + +def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; +def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; + +//===----------------------------------------------------------------------===// +// Move wide (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: MOVN, MOVZ, MOVK + MOV aliases + +// A wide variety of different relocations are needed for variants of these +// instructions, so it turns out that we need a different operand for all of +// them. +multiclass movw_operands<string prefix, string instname, int width> { + def _imm_asmoperand : AsmOperandClass { + let Name = instname # width # "Shifted" # shift; + let PredicateMethod = "is" # instname # width # "Imm"; + let RenderMethod = "addMoveWideImmOperands"; + let ParserMethod = "ParseImmWithLSLOperand"; + let DiagnosticType = "MOVWUImm16"; + } + + def _imm : Operand<i32> { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand"); + let PrintMethod = "printMoveWideImmOperand"; + let EncoderMethod = "getMoveWideImmOpValue"; + let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">"; + + let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); + } +} + +defm movn32 : movw_operands<"movn32", "MOVN", 32>; +defm movn64 : movw_operands<"movn64", "MOVN", 64>; +defm movz32 : movw_operands<"movz32", "MOVZ", 32>; +defm movz64 : movw_operands<"movz64", "MOVZ", 64>; +defm movk32 : movw_operands<"movk32", "MOVK", 32>; +defm movk64 : movw_operands<"movk64", "MOVK", 64>; + +multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit, + dag ins64bit> { + + def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, + !strconcat(asmop, "\t$Rd, $FullImm"), + [], NoItinerary> { + bits<18> FullImm; + let UImm16 = FullImm{15-0}; + let Shift = FullImm{17-16}; + } + + def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, + !strconcat(asmop, "\t$Rd, $FullImm"), + [], NoItinerary> { + bits<18> FullImm; + let UImm16 = FullImm{15-0}; + let Shift = FullImm{17-16}; + } +} + +let isMoveImm = 1, isReMaterializable = 1, + isAsCheapAsAMove = 1, hasSideEffects = 0 in { + defm MOVN : A64I_movwSizes<0b00, "movn", + (ins movn32_imm:$FullImm), + (ins movn64_imm:$FullImm)>; + + // Some relocations are able to convert between a MOVZ and a MOVN. If these + // are applied the instruction must be emitted with the corresponding bits as + // 0, which means a MOVZ needs to override that bit from the default. + let PostEncoderMethod = "fixMOVZ" in + defm MOVZ : A64I_movwSizes<0b10, "movz", + (ins movz32_imm:$FullImm), + (ins movz64_imm:$FullImm)>; +} + +let Constraints = "$src = $Rd" in +defm MOVK : A64I_movwSizes<0b11, "movk", + (ins GPR32:$src, movk32_imm:$FullImm), + (ins GPR64:$src, movk64_imm:$FullImm)>; + + +// And now the "MOV" aliases. These also need their own operands because what +// they accept is completely different to what the base instructions accept. +multiclass movalias_operand<string prefix, string basename, + string immpredicate, int width> { + def _asmoperand : AsmOperandClass { + let Name = basename # width # "MovAlias"; + let PredicateMethod + = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">"; + let RenderMethod + = "addMoveWideMovAliasOperands<" # width # ", " + # "A64Imms::" # immpredicate # ">"; + } + + def _movimm : Operand<i32> { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + + let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); + } +} + +defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>; +defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>; +defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>; +defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>; + +// FIXME: these are officially canonical aliases, but TableGen is too limited to +// print them at the moment. I believe in this case an "AliasPredicate" method +// will need to be implemented. to allow it, as well as the more generally +// useful handling of non-register, non-constant operands. +class movalias<Instruction INST, RegisterClass GPR, Operand operand> + : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>; + +def : movalias<MOVZwii, GPR32, movz32_movimm>; +def : movalias<MOVZxii, GPR64, movz64_movimm>; +def : movalias<MOVNwii, GPR32, movn32_movimm>; +def : movalias<MOVNxii, GPR64, movn64_movimm>; + +//===----------------------------------------------------------------------===// +// PC-relative addressing instructions +//===----------------------------------------------------------------------===// +// Contains: ADR, ADRP + +def adr_label : Operand<i64> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>"; + + // This label is a 21-bit offset from PC, unscaled + let PrintMethod = "printLabelOperand<21, 1>"; + let ParserMatchClass = label_asmoperand<21, 1>; + let OperandType = "OPERAND_PCREL"; +} + +def adrp_label_asmoperand : AsmOperandClass { + let Name = "AdrpLabel"; + let RenderMethod = "addLabelOperands<21, 4096>"; + let DiagnosticType = "Label"; +} + +def adrp_label : Operand<i64> { + let EncoderMethod = "getAdrpLabelOpValue"; + + // This label is a 21-bit offset from PC, scaled by the page-size: 4096. + let PrintMethod = "printLabelOperand<21, 4096>"; + let ParserMatchClass = adrp_label_asmoperand; + let OperandType = "OPERAND_PCREL"; +} + +let hasSideEffects = 0 in { + def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), + "adr\t$Rd, $Label", [], NoItinerary>; + + def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label), + "adrp\t$Rd, $Label", [], NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// System instructions +//===----------------------------------------------------------------------===// +// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS +// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL + +// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values. +def uimm3_asmoperand : AsmOperandClass { + let Name = "UImm3"; + let PredicateMethod = "isUImm<3>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm3"; +} + +def uimm3 : Operand<i32> { + let ParserMatchClass = uimm3_asmoperand; +} + +// The HINT alias can accept a simple unsigned 7-bit immediate. +def uimm7_asmoperand : AsmOperandClass { + let Name = "UImm7"; + let PredicateMethod = "isUImm<7>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm7"; +} + +def uimm7 : Operand<i32> { + let ParserMatchClass = uimm7_asmoperand; +} + +// Multiclass namedimm is defined with the prefetch operands. Most of these fit +// into the NamedImmMapper scheme well: they either accept a named operand or +// any immediate under a particular value (which may be 0, implying no immediate +// is allowed). +defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">; +defm isb : namedimm<"isb", "A64ISB::ISBMapper">; +defm ic : namedimm<"ic", "A64IC::ICMapper">; +defm dc : namedimm<"dc", "A64DC::DCMapper">; +defm at : namedimm<"at", "A64AT::ATMapper">; +defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; + +// However, MRS and MSR are more complicated for a few reasons: +// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an +// implementation-defined effect +// * Most registers are shared, but some are read-only or write-only. +// * There is a variant of MSR which accepts the same register name (SPSel), +// but which would have a different encoding. + +// In principle these could be resolved in with more complicated subclasses of +// NamedImmMapper, however that imposes an overhead on other "named +// immediates". Both in concrete terms with virtual tables and in unnecessary +// abstraction. + +// The solution adopted here is to take the MRS/MSR Mappers out of the usual +// hierarchy (they're not derived from NamedImmMapper) and to add logic for +// their special situation. +def mrs_asmoperand : AsmOperandClass { + let Name = "MRS"; + let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MRS"; +} + +def mrs_op : Operand<i32> { + let ParserMatchClass = mrs_asmoperand; + let PrintMethod = "printMRSOperand"; + let DecoderMethod = "DecodeMRSOperand"; +} + +def msr_asmoperand : AsmOperandClass { + let Name = "MSRWithReg"; + + // Note that SPSel is valid for both this and the pstate operands, but with + // different immediate encodings. This is why these operands provide a string + // AArch64Operand rather than an immediate. The overlap is small enough that + // it could be resolved with hackery now, but who can say in future? + let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MSR"; +} + +def msr_op : Operand<i32> { + let ParserMatchClass = msr_asmoperand; + let PrintMethod = "printMSROperand"; + let DecoderMethod = "DecodeMSROperand"; +} + +def pstate_asmoperand : AsmOperandClass { + let Name = "MSRPState"; + // See comment above about parser. + let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MSR"; +} + +def pstate_op : Operand<i32> { + let ParserMatchClass = pstate_asmoperand; + let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>"; + let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>"; +} + +// When <CRn> is specified, an assembler should accept something like "C4", not +// the usual "#4" immediate. +def CRx_asmoperand : AsmOperandClass { + let Name = "CRx"; + let PredicateMethod = "isUImm<4>"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "ParseCRxOperand"; + // Diagnostics are handled in all cases by ParseCRxOperand. +} + +def CRx : Operand<i32> { + let ParserMatchClass = CRx_asmoperand; + let PrintMethod = "printCRxOperand"; +} + + +// Finally, we can start defining the instructions. + +// HINT is straightforward, with a few aliases. +def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7", + [], NoItinerary> { + bits<7> UImm7; + let CRm = UImm7{6-3}; + let Op2 = UImm7{2-0}; + + let Op0 = 0b00; + let Op1 = 0b011; + let CRn = 0b0010; + let Rt = 0b11111; +} + +def : InstAlias<"nop", (HINTi 0)>; +def : InstAlias<"yield", (HINTi 1)>; +def : InstAlias<"wfe", (HINTi 2)>; +def : InstAlias<"wfi", (HINTi 3)>; +def : InstAlias<"sev", (HINTi 4)>; +def : InstAlias<"sevl", (HINTi 5)>; + +// Quite a few instructions then follow a similar pattern of fixing common +// fields in the bitpattern, we'll define a helper-class for them. +class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2, + Operand operand, string asmop> + : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"), + [], NoItinerary> { + let Op0 = op0; + let Op1 = op1; + let CRn = crn; + let Op2 = op2; + let Rt = 0b11111; +} + + +def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">; +def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">; +def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">; +def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">; + +def : InstAlias<"clrex", (CLREXi 0b1111)>; +def : InstAlias<"isb", (ISBi 0b1111)>; + +// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP +// configurations at least. +def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>; + +// Any SYS bitpattern can be represented with a complex and opaque "SYS" +// instruction. +def SYSiccix : A64I_system<0b0, (outs), + (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, + uimm3:$Op2, GPR64:$Rt), + "sys\t$Op1, $CRn, $CRm, $Op2, $Rt", + [], NoItinerary> { + let Op0 = 0b01; +} + +// You can skip the Xt argument whether it makes sense or not for the generic +// SYS instruction. +def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2", + (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>; + + +// But many have aliases, which obviously don't fit into +class SYSalias<dag ins, string asmstring> + : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> { + let isAsmParserOnly = 1; + + bits<14> SysOp; + let Op0 = 0b01; + let Op1 = SysOp{13-11}; + let CRn = SysOp{10-7}; + let CRm = SysOp{6-3}; + let Op2 = SysOp{2-0}; +} + +def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">; + +def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> { + let Rt = 0b11111; +} + +def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">; +def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">; + +def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">; + +def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> { + let Rt = 0b11111; +} + + +def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt), + (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2), + "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2", + [], NoItinerary> { + let Op0 = 0b01; +} + +// The instructions themselves are rather simple for MSR and MRS. +def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), + "msr\t$SysReg, $Rt", [], NoItinerary> { + bits<16> SysReg; + let Op0 = SysReg{15-14}; + let Op1 = SysReg{13-11}; + let CRn = SysReg{10-7}; + let CRm = SysReg{6-3}; + let Op2 = SysReg{2-0}; +} + +def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), + "mrs\t$Rt, $SysReg", [], NoItinerary> { + bits<16> SysReg; + let Op0 = SysReg{15-14}; + let Op1 = SysReg{13-11}; + let CRn = SysReg{10-7}; + let CRm = SysReg{6-3}; + let Op2 = SysReg{2-0}; +} + +def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), + "msr\t$PState, $CRm", [], NoItinerary> { + bits<6> PState; + + let Op0 = 0b00; + let Op1 = PState{5-3}; + let CRn = 0b0100; + let Op2 = PState{2-0}; + let Rt = 0b11111; +} + +//===----------------------------------------------------------------------===// +// Test & branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: TBZ, TBNZ + +// The bit to test is a simple unsigned 6-bit immediate in the X-register +// versions. +def uimm6 : Operand<i64> { + let ParserMatchClass = uimm6_asmoperand; +} + +def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>; + +def tbimm_target : Operand<OtherVT> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>"; + + // This label is a 14-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<14, 4>"; + let ParserMatchClass = label_wid14_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>; +def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>; + +// These instructions correspond to patterns involving "and" with a power of +// two, which we need to be able to select. +def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">; +def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">; + +let isBranch = 1, isTerminator = 1 in { + def TBZxii : A64I_TBimm<0b0, (outs), + (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), + "tbz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), + A64eq, bb:$Label)], + NoItinerary>; + + def TBNZxii : A64I_TBimm<0b1, (outs), + (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), + "tbnz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), + A64ne, bb:$Label)], + NoItinerary>; + + + // Note, these instructions overlap with the above 64-bit patterns. This is + // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both + // do the same thing and are both permitted assembly. They also both have + // sensible DAG patterns. + def TBZwii : A64I_TBimm<0b0, (outs), + (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), + "tbz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), + A64eq, bb:$Label)], + NoItinerary> { + let Imm{5} = 0b0; + } + + def TBNZwii : A64I_TBimm<0b1, (outs), + (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), + "tbnz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), + A64ne, bb:$Label)], + NoItinerary> { + let Imm{5} = 0b0; + } +} + +//===----------------------------------------------------------------------===// +// Unconditional branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: B, BL + +def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>; + +def bimm_target : Operand<OtherVT> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>"; + + // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<26, 4>"; + let ParserMatchClass = label_wid26_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +def blimm_target : Operand<i64> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>"; + + // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<26, 4>"; + let ParserMatchClass = label_wid26_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type> + : A64I_Bimm<op, (outs), (ins lbl_type:$Label), + !strconcat(asmop, "\t$Label"), patterns, + NoItinerary>; + +let isBranch = 1 in { + def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> { + let isTerminator = 1; + let isBarrier = 1; + } + + def BLimm : A64I_BimmImpl<0b1, "bl", + [(AArch64Call tglobaladdr:$Label)], blimm_target> { + let isCall = 1; + let Defs = [X30]; + } +} + +def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>; + +//===----------------------------------------------------------------------===// +// Unconditional branch (register) instructions +//===----------------------------------------------------------------------===// +// Contains: BR, BLR, RET, ERET, DRP. + +// Most of the notional opcode fields in the A64I_Breg format are fixed in A64 +// at the moment. +class A64I_BregImpl<bits<4> opc, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin = NoItinerary> + : A64I_Breg<opc, 0b11111, 0b000000, 0b00000, + outs, ins, asmstr, patterns, itin> { + let isBranch = 1; + let isIndirectBranch = 1; +} + +// Note that these are not marked isCall or isReturn because as far as LLVM is +// concerned they're not. "ret" is just another jump unless it has been selected +// by LLVM as the function's return. + +let isBranch = 1 in { + def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn), + "br\t$Rn", [(brind i64:$Rn)]> { + let isBarrier = 1; + let isTerminator = 1; + } + + def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), + "blr\t$Rn", [(AArch64Call i64:$Rn)]> { + let isBarrier = 0; + let isCall = 1; + let Defs = [X30]; + } + + def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), + "ret\t$Rn", []> { + let isBarrier = 1; + let isTerminator = 1; + let isReturn = 1; + } + + // Create a separate pseudo-instruction for codegen to use so that we don't + // flag x30 as used in every function. It'll be restored before the RET by the + // epilogue if it's legitimately used. + def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + } + + def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> { + let Rn = 0b11111; + let isBarrier = 1; + let isTerminator = 1; + let isReturn = 1; + } + + def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> { + let Rn = 0b11111; + let isBarrier = 1; + } +} + +def RETAlias : InstAlias<"ret", (RETx X30)>; + + +//===----------------------------------------------------------------------===// +// Address generation patterns +//===----------------------------------------------------------------------===// + +// Primary method of address generation for the small/absolute memory model is +// an ADRP/ADR pair: +// ADRP x0, some_variable +// ADD x0, x0, #:lo12:some_variable +// +// The load/store elision of the ADD is accomplished when selecting +// addressing-modes. This just mops up the cases where that doesn't work and we +// really need an address in some register. + +// This wrapper applies a LO12 modifier to the address. Otherwise we could just +// use the same address. + +class ADRP_ADD<SDNode Wrapper, SDNode addrop> + : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)), + (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>; + +def : ADRP_ADD<A64WrapperSmall, tblockaddress>; +def : ADRP_ADD<A64WrapperSmall, texternalsym>; +def : ADRP_ADD<A64WrapperSmall, tglobaladdr>; +def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>; +def : ADRP_ADD<A64WrapperSmall, tjumptable>; + +//===----------------------------------------------------------------------===// +// GOT access patterns +//===----------------------------------------------------------------------===// + +// FIXME: Wibble + +class GOTLoadSmall<SDNode addrfrag> + : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), + (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; + +def : GOTLoadSmall<texternalsym>; +def : GOTLoadSmall<tglobaladdr>; +def : GOTLoadSmall<tglobaltlsaddr>; + +//===----------------------------------------------------------------------===// +// Tail call handling +//===----------------------------------------------------------------------===// + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in { + def TC_RETURNdi + : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff), + [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>; + + def TC_RETURNxi + : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), + [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>; +} + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [XSP] in { + def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [], + (Bimm bimm_target:$Label)>; + + def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [], + (BRx GPR64:$Rd)>; +} + + +def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), + (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>; + +//===----------------------------------------------------------------------===// +// Thread local storage +//===----------------------------------------------------------------------===// + +// This is a pseudo-instruction representing the ".tlsdesccall" directive in +// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the +// current location. It should always be immediately followed by a BLR +// instruction, and is intended solely for relaxation by the linker. + +def : Pat<(A64threadpointer), (MRSxi 0xde82)>; + +def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> { + let hasSideEffects = 1; +} + +def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var), + [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> { + let isCall = 1; + let Defs = [X30]; +} + +def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var), + (TLSDESC_BLRx $Rn, texternalsym:$Var)>; + +//===----------------------------------------------------------------------===// +// Bitfield patterns +//===----------------------------------------------------------------------===// + +def bfi32_lsb_to_immr : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64); +}]>; + +def bfi64_lsb_to_immr : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64); +}]>; + +def bfi_width_to_imms : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64); +}]>; + + +// The simpler patterns deal with cases where no AND mask is actually needed +// (either all bits are used or the low 32 bits are used). +let AddedComplexity = 10 in { + +def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS), + (BFIxxii $src, $Rn, + (bfi64_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS)))>; + +def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS), + (BFIwwii $src, $Rn, + (bfi32_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS)))>; + + +def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS), + (i64 4294967295)), + (SUBREG_TO_REG (i64 0), + (BFIwwii (EXTRACT_SUBREG $src, sub_32), + (EXTRACT_SUBREG $Rn, sub_32), + (bfi32_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS))), + sub_32)>; + +} + +//===----------------------------------------------------------------------===// +// Miscellaneous patterns +//===----------------------------------------------------------------------===// + +// Truncation from 64 to 32-bits just involves renaming your register. +def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>; + +// Similarly, extension where we don't care about the high bits is +// just a rename. +def : Pat<(i64 (anyext i32:$val)), + (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>; + +// SELECT instructions providing f128 types need to be handled by a +// pseudo-instruction since the eventual code will need to introduce basic +// blocks and control flow. +def F128CSEL : PseudoInst<(outs FPR128:$Rd), + (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond), + [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> { + let Uses = [NZCV]; + let usesCustomInserter = 1; +} + +//===----------------------------------------------------------------------===// +// Load/store patterns +//===----------------------------------------------------------------------===// + +// There are lots of patterns here, because we need to allow at least three +// parameters to vary independently. +// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ... +// 2. LLVM source: zextloadi8, anyextloadi8, ... +// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ... +// +// The biggest problem turns out to be the address-generation variable. At the +// point of instantiation we need to produce two DAGs, one for the pattern and +// one for the instruction. Doing this at the lowest level of classes doesn't +// work. +// +// Consider the simple uimm12 addressing mode, and the desire to match both (add +// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the +// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or +// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this +// operation, and PatFrags are for selection not output. +// +// As a result, the address-generation patterns are the final +// instantiations. However, we do still need to vary the operand for the address +// further down (At the point we're deciding A64WrapperSmall, we don't know +// the memory width of the operation). + +//===------------------------------ +// 1. Basic infrastructural defs +//===------------------------------ + +// First, some simple classes for !foreach and !subst to use: +class Decls { + dag pattern; +} + +def decls : Decls; +def ALIGN; +def INST; +def OFFSET; +def SHIFT; + +// You can't use !subst on an actual immediate, but you *can* use it on an +// operand record that happens to match a single immediate. So we do. +def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>; +def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>; +def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>; +def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>; +def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>; + +// If the low bits of a pointer are known to be 0 then an "or" is just as good +// as addition for computing an offset. This fragment forwards that check for +// TableGen's use. +def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), +[{ + return CurDAG->isBaseWithConstantOffset(SDValue(N, 0)); +}]>; + +// Load/store (unsigned immediate) operations with relocations against global +// symbols (for lo12) are only valid if those symbols have correct alignment +// (since the immediate offset is divided by the access scale, it can't have a +// remainder). +// +// The guaranteed alignment is provided as part of the WrapperSmall +// operation, and checked against one of these. +def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>; +def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>; +def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>; +def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>; +def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>; + +// "Normal" load/store instructions can be used on atomic operations, provided +// the ordering parameter is at most "monotonic". Anything above that needs +// special handling with acquire/release instructions. +class simple_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic; +}]>; + +def atomic_load_simple_i8 : simple_load<atomic_load_8>; +def atomic_load_simple_i16 : simple_load<atomic_load_16>; +def atomic_load_simple_i32 : simple_load<atomic_load_32>; +def atomic_load_simple_i64 : simple_load<atomic_load_64>; + +class simple_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic; +}]>; + +def atomic_store_simple_i8 : simple_store<atomic_store_8>; +def atomic_store_simple_i16 : simple_store<atomic_store_16>; +def atomic_store_simple_i32 : simple_store<atomic_store_32>; +def atomic_store_simple_i64 : simple_store<atomic_store_64>; + +//===------------------------------ +// 2. UImm12 and SImm9 +//===------------------------------ + +// These instructions have two operands providing the address so they can be +// treated similarly for most purposes. + +//===------------------------------ +// 2.1 Base patterns covering extend/truncate semantics +//===------------------------------ + +// Atomic patterns can be shared between integer operations of all sizes, a +// quick multiclass here allows reuse. +multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, ValueType transty, + ValueType sty> { + def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address), + (LOAD Base, Offset)>; + + def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt), + (STORE $Rt, Base, Offset)>; +} + +// Instructions accessing a memory chunk smaller than a register (or, in a +// pinch, the same size) have a characteristic set of patterns they want to +// match: extending loads and truncating stores. This class deals with the +// sign-neutral version of those patterns. +// +// It will be instantiated across multiple addressing-modes. +multiclass ls_small_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, + dag address, ValueType sty> + : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> { + def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>; + + def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>; + + // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit + // register was actually set. + def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; + + def : Pat<(i64 (!cast<SDNode>(extload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; + + def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address), + (STORE $Rt, Base, Offset)>; + + // For truncating store from 64-bits, we have to manually tell LLVM to + // ignore the high bits of the x register. + def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address), + (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>; +} + +// Next come patterns for sign-extending loads. +multiclass load_signed_pats<string T, string U, dag Base, dag Offset, + dag address, ValueType sty> { + def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>; + + def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>; + +} + +// and finally "natural-width" loads and stores come next. +multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, ValueType sty> { + def : Pat<(sty (load address)), (LOAD Base, Offset)>; + def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>; +} + +// Integer operations also get atomic instructions to select for. +multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, ValueType sty> + : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>, + ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>; + +//===------------------------------ +// 2.2. Addressing-mode instantiations +//===------------------------------ + +multiclass uimm12_pats<dag address, dag Base, dag Offset> { + defm : ls_small_pats<LS8_LDR, LS8_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, byte_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, byte_uimm12, + !subst(ALIGN, any_align, decls.pattern))), + i8>; + defm : ls_small_pats<LS16_LDR, LS16_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + i16>; + defm : ls_small_pats<LS32_LDR, LS32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + i32>; + + defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + i32>; + + defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, dword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, dword_uimm12, + !subst(ALIGN, min_align8, decls.pattern))), + i64>; + + defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + f16>; + + defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + f32>; + + defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, dword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, dword_uimm12, + !subst(ALIGN, min_align8, decls.pattern))), + f64>; + + defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, qword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, qword_uimm12, + !subst(ALIGN, min_align16, decls.pattern))), + f128>; + + defm : load_signed_pats<"B", "", Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, byte_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, byte_uimm12, + !subst(ALIGN, any_align, decls.pattern))), + i8>; + + defm : load_signed_pats<"H", "", Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + i16>; + + def : Pat<(sextloadi32 !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern)))), + (LDRSWx Base, !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)))>; +} + +// Straightforward patterns of last resort: a pointer with or without an +// appropriate offset. +defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>; +defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12), + (i64 i64:$Rn), (i64 OFFSET:$UImm12)>; + +// The offset could be hidden behind an "or", of course: +defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12), + (i64 i64:$Rn), (i64 OFFSET:$UImm12)>; + +// Global addresses under the small-absolute model should use these +// instructions. There are ELF relocations specifically for it. +defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN), + (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>; + +defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, + ALIGN), + (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>; + +// External symbols that make it this far should also get standard relocations. +defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, + ALIGN), + (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; + +defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN), + (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; + +// We also want to use uimm12 instructions for local variables at the moment. +def tframeindex_XFORM : SDNodeXForm<frameindex, [{ + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + return CurDAG->getTargetFrameIndex(FI, MVT::i64); +}]>; + +defm : uimm12_pats<(i64 frameindex:$Rn), + (tframeindex_XFORM tframeindex:$Rn), (i64 0)>; + +// These can be much simpler than uimm12 because we don't to change the operand +// type (e.g. LDURB and LDURH take the same operands). +multiclass simm9_pats<dag address, dag Base, dag Offset> { + defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>; + defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>; + + defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>; + defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>; + + defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>; + defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>; + defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>; + defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address, + f128>; + + def : Pat<(i64 (zextloadi32 address)), + (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>; + + def : Pat<(truncstorei32 i64:$Rt, address), + (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>; + + defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>; + defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>; + def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>; +} + +defm : simm9_pats<(add i64:$Rn, simm9:$SImm9), + (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>; + +defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9), + (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>; + + +//===------------------------------ +// 3. Register offset patterns +//===------------------------------ + +// Atomic patterns can be shared between integer operations of all sizes, a +// quick multiclass here allows reuse. +multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag Extend, dag address, + ValueType transty, ValueType sty> { + def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address), + (LOAD Base, Offset, Extend)>; + + def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt), + (STORE $Rt, Base, Offset, Extend)>; +} + +// The register offset instructions take three operands giving the instruction, +// and have an annoying split between instructions where Rm is 32-bit and +// 64-bit. So we need a special hierarchy to describe them. Other than that the +// same operations should be supported as for simm9 and uimm12 addressing. + +multiclass ro_small_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, + dag address, ValueType sty> + : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> { + def : Pat<(!cast<SDNode>(zextload # sty) address), + (LOAD Base, Offset, Extend)>; + + def : Pat<(!cast<SDNode>(extload # sty) address), + (LOAD Base, Offset, Extend)>; + + // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit + // register was actually set. + def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; + + def : Pat<(i64 (!cast<SDNode>(extload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; + + def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address), + (STORE $Rt, Base, Offset, Extend)>; + + // For truncating store from 64-bits, we have to manually tell LLVM to + // ignore the high bits of the x register. + def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address), + (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>; + +} + +// Next come patterns for sign-extending loads. +multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend, + dag address, ValueType sty> { + def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset") + Base, Offset, Extend)>; + + def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + +// and finally "natural-width" loads and stores come next. +multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, dag address, + ValueType sty> { + def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>; + def : Pat<(store sty:$Rt, address), + (STORE $Rt, Base, Offset, Extend)>; +} + +multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, dag address, + ValueType sty> + : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>, + ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>; + +multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset, + dag Extend> { + defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; + defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + i16>; + defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + i32>; + + defm : ro_int_neutral_pats< + !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + i32>; + + defm : ro_int_neutral_pats< + !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + i64>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + f16>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + f32>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + f64>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq4, decls.pattern)), + f128>; + + defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; + + defm : ro_signed_pats<"H", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + i16>; + + def : Pat<(sextloadi32 !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern))), + (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + + +// Finally we're in a position to tell LLVM exactly what addresses are reachable +// using register-offset instructions. Essentially a base plus a possibly +// extended, possibly shifted (by access size) offset. + +defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)), + (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>; + +defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)), + (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>; + +defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)), + (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>; + +defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)), + (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>; + +defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm), + (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>; + +defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)), + (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp new file mode 100644 index 0000000..c96bf85 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -0,0 +1,140 @@ +//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower AArch64 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "AArch64AsmPrinter.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +MCOperand +AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Sym) const { + const MCExpr *Expr = 0; + + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext); + + switch (MO.getTargetFlags()) { + case AArch64II::MO_GOT: + Expr = AArch64MCExpr::CreateGOT(Expr, OutContext); + break; + case AArch64II::MO_GOT_LO12: + Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext); + break; + case AArch64II::MO_LO12: + Expr = AArch64MCExpr::CreateLo12(Expr, OutContext); + break; + case AArch64II::MO_DTPREL_G1: + Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext); + break; + case AArch64II::MO_DTPREL_G0_NC: + Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext); + break; + case AArch64II::MO_GOTTPREL: + Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext); + break; + case AArch64II::MO_GOTTPREL_LO12: + Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext); + break; + case AArch64II::MO_TLSDESC: + Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext); + break; + case AArch64II::MO_TLSDESC_LO12: + Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext); + break; + case AArch64II::MO_TPREL_G1: + Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext); + break; + case AArch64II::MO_TPREL_G0_NC: + Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); + break; + case AArch64II::MO_NO_FLAG: + // Expr is already correct + break; + default: + llvm_unreachable("Unexpected MachineOperand flag"); + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), + OutContext), + OutContext); + + return MCOperand::CreateExpr(Expr); +} + +bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, + MCOperand &MCOp) const { + switch (MO.getType()) { + default: llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + if (MO.isImplicit()) + return false; + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_BlockAddress: + MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex())); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex())); + break; + case MachineOperand::MO_RegisterMask: + // Ignore call clobbers + return false; + + } + + return true; +} + +void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI, + AArch64AsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + if (AP.lowerOperand(MO, MCOp)) + OutMI.addOperand(MCOp); + } +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp new file mode 100644 index 0000000..f45d8f7 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -0,0 +1,18 @@ +//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file just contains the anchor for the AArch64MachineFunctionInfo to +// force vtable emission. +// +//===----------------------------------------------------------------------===// +#include "AArch64MachineFunctionInfo.h" + +using namespace llvm; + +void AArch64MachineFunctionInfo::anchor() { } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h new file mode 100644 index 0000000..33da54f --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -0,0 +1,149 @@ +//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares AArch64-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef AARCH64MACHINEFUNCTIONINFO_H +#define AARCH64MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// This class is derived from MachineFunctionInfo and contains private AArch64 +/// target-specific information for each MachineFunction. +class AArch64MachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + + /// Number of bytes of arguments this function has on the stack. If the callee + /// is expected to restore the argument stack this should be a multiple of 16, + /// all usable during a tail call. + /// + /// The alternative would forbid tail call optimisation in some cases: if we + /// want to transfer control from a function with 8-bytes of stack-argument + /// space to a function with 16-bytes then misalignment of this value would + /// make a stack adjustment necessary, which could not be undone by the + /// callee. + unsigned BytesInStackArgArea; + + /// The number of bytes to restore to deallocate space for incoming + /// arguments. Canonically 0 in the C calling convention, but non-zero when + /// callee is expected to pop the args. + unsigned ArgumentStackToRestore; + + /// If the stack needs to be adjusted on frame entry in two stages, this + /// records the size of the first adjustment just prior to storing + /// callee-saved registers. The callee-saved slots are addressed assuming + /// SP == <incoming-SP> - InitialStackAdjust. + unsigned InitialStackAdjust; + + /// Number of local-dynamic TLS accesses. + unsigned NumLocalDynamics; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of the area where LowerFormalArguments puts the + /// general-purpose registers that might contain variadic parameters. + int VariadicGPRIdx; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The size of the frame object used to store the general-purpose registers + /// which might contain variadic arguments. This is the offset from + /// VariadicGPRIdx to what's stored in __gr_top. + unsigned VariadicGPRSize; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of the area where LowerFormalArguments puts the + /// floating-point registers that might contain variadic parameters. + int VariadicFPRIdx; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The size of the frame object used to store the floating-point registers + /// which might contain variadic arguments. This is the offset from + /// VariadicFPRIdx to what's stored in __vr_top. + unsigned VariadicFPRSize; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of an object pointing just past the last known stacked + /// argument on entry to a variadic function. This goes into the __stack field + /// of the va_list type. + int VariadicStackIdx; + + /// The offset of the frame pointer from the stack pointer on function + /// entry. This is expected to be negative. + int FramePointerOffset; + +public: + AArch64MachineFunctionInfo() + : BytesInStackArgArea(0), + ArgumentStackToRestore(0), + InitialStackAdjust(0), + NumLocalDynamics(0), + VariadicGPRIdx(0), + VariadicGPRSize(0), + VariadicFPRIdx(0), + VariadicFPRSize(0), + VariadicStackIdx(0), + FramePointerOffset(0) {} + + explicit AArch64MachineFunctionInfo(MachineFunction &MF) + : BytesInStackArgArea(0), + ArgumentStackToRestore(0), + InitialStackAdjust(0), + NumLocalDynamics(0), + VariadicGPRIdx(0), + VariadicGPRSize(0), + VariadicFPRIdx(0), + VariadicFPRSize(0), + VariadicStackIdx(0), + FramePointerOffset(0) {} + + unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } + void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;} + + unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; } + void setArgumentStackToRestore(unsigned bytes) { + ArgumentStackToRestore = bytes; + } + + unsigned getInitialStackAdjust() const { return InitialStackAdjust; } + void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; } + + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + int getVariadicGPRIdx() const { return VariadicGPRIdx; } + void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; } + + unsigned getVariadicGPRSize() const { return VariadicGPRSize; } + void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; } + + int getVariadicFPRIdx() const { return VariadicFPRIdx; } + void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; } + + unsigned getVariadicFPRSize() const { return VariadicFPRSize; } + void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; } + + int getVariadicStackIdx() const { return VariadicStackIdx; } + void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; } + + int getFramePointerOffset() const { return FramePointerOffset; } + void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; } + +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp new file mode 100644 index 0000000..20b0dcf --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -0,0 +1,171 @@ +//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64RegisterInfo.h" +#include "AArch64FrameLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/BitVector.h" + +#define GET_REGINFO_TARGET_DESC +#include "AArch64GenRegisterInfo.inc" + +using namespace llvm; + +AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii, + const AArch64Subtarget &sti) + : AArch64GenRegisterInfo(AArch64::X30), TII(tii) { +} + +const uint16_t * +AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR_PCS_SaveList; +} + +const uint32_t* +AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const { + return CSR_PCS_RegMask; +} + +const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const { + return TLSDesc_RegMask; +} + +const TargetRegisterClass * +AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::FlagClassRegClass) + return &AArch64::GPR64RegClass; + + return RC; +} + + + +BitVector +AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + Reserved.set(AArch64::XSP); + Reserved.set(AArch64::WSP); + + Reserved.set(AArch64::XZR); + Reserved.set(AArch64::WZR); + + if (TFI->hasFP(MF)) { + Reserved.set(AArch64::X29); + Reserved.set(AArch64::W29); + } + + return Reserved; +} + +void +AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, + int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet"); + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64FrameLowering *TFI = + static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering()); + + // In order to work out the base and offset for addressing, the FrameLowering + // code needs to know (sometimes) whether the instruction is storing/loading a + // callee-saved register, or whether it's a more generic + // operation. Fortunately the frame indices are used *only* for that purpose + // and are contiguous, so we can check here. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI; + + unsigned FrameReg; + int64_t Offset; + Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, + IsCalleeSaveOp); + + Offset += MI.getOperand(FIOperandNum + 1).getImm(); + + // DBG_VALUE instructions have no real restrictions so they can be handled + // easily. + if (MI.isDebugValue()) { + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } + + int MinOffset, MaxOffset, OffsetScale; + if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) { + MinOffset = 0; + MaxOffset = 0xfff; + OffsetScale = 1; + } else { + // Load/store of a stack object + TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset); + } + + // The frame lowering has told us a base and offset it thinks we should use to + // access this variable, but it's still up to us to make sure the values are + // legal for the instruction in question. + if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) { + unsigned BaseReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII, + BaseReg, FrameReg, BaseReg, Offset); + FrameReg = BaseReg; + Offset = 0; + } + + // Negative offsets are expected if we address from FP, but for + // now this checks nothing has gone horribly wrong. + assert(Offset >= 0 && "Unexpected negative offset from SP"); + + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); +} + +unsigned +AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (TFI->hasFP(MF)) + return AArch64::X29; + else + return AArch64::XSP; +} + +bool +AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const AArch64FrameLowering *AFI + = static_cast<const AArch64FrameLowering*>(TFI); + return AFI->useFPForAddressing(MF); +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h new file mode 100644 index 0000000..bb64fd5 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -0,0 +1,76 @@ +//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the MCRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H +#define LLVM_TARGET_AARCH64REGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "AArch64GenRegisterInfo.inc" + +namespace llvm { + +class AArch64InstrInfo; +class AArch64Subtarget; + +struct AArch64RegisterInfo : public AArch64GenRegisterInfo { +private: + const AArch64InstrInfo &TII; + +public: + AArch64RegisterInfo(const AArch64InstrInfo &tii, + const AArch64Subtarget &sti); + + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; + + const uint32_t *getTLSDescCallPreservedMask() const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, + RegScavenger *Rs = NULL) const; + + /// getCrossCopyRegClass - Returns a legal register class to copy a register + /// in the specified class to or from. Returns original class if it is + /// possible to copy between a two registers of the specified class. + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const; + + /// getLargestLegalSuperClass - Returns the largest super class of RC that is + /// legal to use in the current sub-target and has the same spill size. + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::tcGPR64RegClass) + return &AArch64::GPR64RegClass; + + return RC; + } + + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } + + bool useFPForScavengingIndex(const MachineFunction &MF) const; +}; + +} // end namespace llvm + +#endif // LLVM_TARGET_AARCH64REGISTERINFO_H diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td new file mode 100644 index 0000000..bd79546 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -0,0 +1,203 @@ +//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains declarations that describe the AArch64 register file +// +//===----------------------------------------------------------------------===// + +let Namespace = "AArch64" in { +def sub_128 : SubRegIndex; +def sub_64 : SubRegIndex; +def sub_32 : SubRegIndex; +def sub_16 : SubRegIndex; +def sub_8 : SubRegIndex; + +// The VPR registers are handled as sub-registers of FPR equivalents, but +// they're really the same thing. We give this concept a special index. +def sub_alias : SubRegIndex; +} + +// Registers are identified with 5-bit ID numbers. +class AArch64Reg<bits<16> enc, string n> : Register<n> { + let HWEncoding = enc; + let Namespace = "AArch64"; +} + +class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [], + list<SubRegIndex> inds = []> + : AArch64Reg<enc, n> { + let SubRegs = subregs; + let SubRegIndices = inds; +} + +//===----------------------------------------------------------------------===// +// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp +//===----------------------------------------------------------------------===// + +foreach Index = 0-30 in { + def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>; +} + +def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; +def WZR : AArch64Reg<31, "wzr">; + +// Could be combined with previous loop, but this way leaves w and x registers +// consecutive as LLVM register numbers, which makes for easier debugging. +foreach Index = 0-30 in { + def X#Index : AArch64RegWithSubs<Index, "x"#Index, + [!cast<Register>("W"#Index)], [sub_32]>, + DwarfRegNum<[Index]>; +} + +def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>; +def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>; + +// Most instructions treat register 31 as zero for reads and a black-hole for +// writes. + +// Note that the order of registers is important for the Disassembler here: +// tablegen uses it to form MCRegisterClass::getRegister, which we assume can +// take an encoding value. +def GPR32 : RegisterClass<"AArch64", [i32], 32, + (add (sequence "W%u", 0, 30), WZR)> { +} + +def GPR64 : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 30), XZR)> { +} + +def GPR32nowzr : RegisterClass<"AArch64", [i32], 32, + (sequence "W%u", 0, 30)> { +} + +def GPR64noxzr : RegisterClass<"AArch64", [i64], 64, + (sequence "X%u", 0, 30)> { +} + +// For tail calls, we can't use callee-saved registers or the structure-return +// register, as they are supposed to be live across function calls and may be +// clobbered by the epilogue. +def tcGPR64 : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 7), + (sequence "X%u", 9, 18))> { +} + + +// Certain addressing-useful instructions accept sp directly. Again the order of +// registers is important to the Disassembler. +def GPR32wsp : RegisterClass<"AArch64", [i32], 32, + (add (sequence "W%u", 0, 30), WSP)> { +} + +def GPR64xsp : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 30), XSP)> { +} + +// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and +// non-SP variants). We can't use a bare register in those patterns because +// TableGen doesn't like it, so we need a class containing just stack registers +def Rxsp : RegisterClass<"AArch64", [i64], 64, + (add XSP)> { +} + +def Rwsp : RegisterClass<"AArch64", [i32], 32, + (add WSP)> { +} + +//===----------------------------------------------------------------------===// +// Scalar registers in the vector unit: +// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31 +//===----------------------------------------------------------------------===// + +foreach Index = 0-31 in { + def B # Index : AArch64Reg< Index, "b" # Index>, + DwarfRegNum<[!add(Index, 64)]>; + + def H # Index : AArch64RegWithSubs<Index, "h" # Index, + [!cast<Register>("B" # Index)], [sub_8]>, + DwarfRegNum<[!add(Index, 64)]>; + + def S # Index : AArch64RegWithSubs<Index, "s" # Index, + [!cast<Register>("H" # Index)], [sub_16]>, + DwarfRegNum<[!add(Index, 64)]>; + + def D # Index : AArch64RegWithSubs<Index, "d" # Index, + [!cast<Register>("S" # Index)], [sub_32]>, + DwarfRegNum<[!add(Index, 64)]>; + + def Q # Index : AArch64RegWithSubs<Index, "q" # Index, + [!cast<Register>("D" # Index)], [sub_64]>, + DwarfRegNum<[!add(Index, 64)]>; +} + + +def FPR8 : RegisterClass<"AArch64", [i8], 8, + (sequence "B%u", 0, 31)> { +} + +def FPR16 : RegisterClass<"AArch64", [f16], 16, + (sequence "H%u", 0, 31)> { +} + +def FPR32 : RegisterClass<"AArch64", [f32], 32, + (sequence "S%u", 0, 31)> { +} + +def FPR64 : RegisterClass<"AArch64", [f64], 64, + (sequence "D%u", 0, 31)> { +} + +def FPR128 : RegisterClass<"AArch64", [f128], 128, + (sequence "Q%u", 0, 31)> { +} + + +//===----------------------------------------------------------------------===// +// Vector registers: +//===----------------------------------------------------------------------===// + +// NEON registers simply specify the overall vector, and it's expected that +// Instructions will individually specify the acceptable data layout. In +// principle this leaves two approaches open: +// + An operand, giving a single ADDvvv instruction (for example). This turns +// out to be unworkable in the assembly parser (without every Instruction +// having a "cvt" function, at least) because the constraints can't be +// properly enforced. It also complicates specifying patterns since each +// instruction will accept many types. +// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific +// details about NEON registers, but simplifies most other details. +// +// The second approach was taken. + +foreach Index = 0-31 in { + def V # Index : AArch64RegWithSubs<Index, "v" # Index, + [!cast<Register>("Q" # Index)], + [sub_alias]>, + DwarfRegNum<[!add(Index, 64)]>; +} + +// These two classes contain the same registers, which should be reasonably +// sensible for MC and allocation purposes, but allows them to be treated +// separately for things like stack spilling. +def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64, + (sequence "V%u", 0, 31)>; + +def VPR128 : RegisterClass<"AArch64", + [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, + (sequence "V%u", 0, 31)>; + +// Flags register +def NZCV : Register<"nzcv"> { + let Namespace = "AArch64"; +} + +def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { + let CopyCost = -1; + let isAllocatable = 0; +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td new file mode 100644 index 0000000..e17cdaa --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td @@ -0,0 +1,10 @@ +//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def GenericItineraries : ProcessorItineraries<[], [], []>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp new file mode 100644 index 0000000..6bbe075 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -0,0 +1,25 @@ +//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-selectiondag-info" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { +} + +AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() { +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h new file mode 100644 index 0000000..d412ed2 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -0,0 +1,32 @@ +//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AArch64 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64SELECTIONDAGINFO_H +#define LLVM_AARCH64SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class AArch64TargetMachine; + +class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { + const AArch64Subtarget *Subtarget; +public: + explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM); + ~AArch64SelectionDAGInfo(); +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp new file mode 100644 index 0000000..d17b738 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -0,0 +1,43 @@ +//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "AArch64Subtarget.h" +#include "AArch64RegisterInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallVector.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "AArch64GenSubtargetInfo.inc" + +using namespace llvm; + +AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS) + : AArch64GenSubtargetInfo(TT, CPU, FS) + , HasNEON(true) + , HasCrypto(true) + , TargetTriple(TT) { + + ParseSubtargetFeatures(CPU, FS); +} + +bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, + Reloc::Model RelocM) const { + if (RelocM == Reloc::Static) + return false; + + return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility(); +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h new file mode 100644 index 0000000..2e9205f --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -0,0 +1,54 @@ +//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H +#define LLVM_TARGET_AARCH64_SUBTARGET_H + +#include "llvm/ADT/Triple.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "AArch64GenSubtargetInfo.inc" + +#include <string> + +namespace llvm { +class StringRef; +class GlobalValue; + +class AArch64Subtarget : public AArch64GenSubtargetInfo { +protected: + bool HasNEON; + bool HasCrypto; + + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + +}; +} // End llvm namespace + +#endif // LLVM_TARGET_AARCH64_SUBTARGET_H diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp new file mode 100644 index 0000000..df599d5 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -0,0 +1,81 @@ +//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the AArch64TargetMachine +// methods. Principally just setting up the passes needed to generate correct +// code on this architecture. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +extern "C" void LLVMInitializeAArch64Target() { + RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target); +} + +AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + InstrInfo(Subtarget), + DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"), + TLInfo(*this), + TSInfo(*this), + FrameLowering(Subtarget) { +} + +namespace { +/// AArch64 Code Generator Pass Configuration Options. +class AArch64PassConfig : public TargetPassConfig { +public: + AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + AArch64TargetMachine &getAArch64TargetMachine() const { + return getTM<AArch64TargetMachine>(); + } + + const AArch64Subtarget &getAArch64Subtarget() const { + return *getAArch64TargetMachine().getSubtargetImpl(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { + return new AArch64PassConfig(this, PM); +} + +bool AArch64PassConfig::addPreEmitPass() { + addPass(&UnpackMachineBundlesID); + addPass(createAArch64BranchFixupPass()); + return true; +} + +bool AArch64PassConfig::addInstSelector() { + addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel())); + + // For ELF, cleanup any local-dynamic TLS accesses. + if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) + addPass(createAArch64CleanupLocalDynamicTLSPass()); + + return false; +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h new file mode 100644 index 0000000..c1f47c2 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -0,0 +1,69 @@ +//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64TARGETMACHINE_H +#define LLVM_AARCH64TARGETMACHINE_H + +#include "AArch64FrameLowering.h" +#include "AArch64ISelLowering.h" +#include "AArch64InstrInfo.h" +#include "AArch64SelectionDAGInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AArch64TargetMachine : public LLVMTargetMachine { + AArch64Subtarget Subtarget; + AArch64InstrInfo InstrInfo; + const DataLayout DL; + AArch64TargetLowering TLInfo; + AArch64SelectionDAGInfo TSInfo; + AArch64FrameLowering FrameLowering; + +public: + AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + const AArch64InstrInfo *getInstrInfo() const { + return &InstrInfo; + } + + const AArch64FrameLowering *getFrameLowering() const { + return &FrameLowering; + } + + const AArch64TargetLowering *getTargetLowering() const { + return &TLInfo; + } + + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + + const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; } + + const DataLayout *getDataLayout() const { return &DL; } + + const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + TargetPassConfig *createPassConfig(PassManagerBase &PM); +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp new file mode 100644 index 0000000..b4452f5 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -0,0 +1,24 @@ +//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file deals with any AArch64 specific requirements on object files. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64TargetObjectFile.h" + +using namespace llvm; + +void +AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h new file mode 100644 index 0000000..bf0565a --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -0,0 +1,31 @@ +//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file deals with any AArch64 specific requirements on object files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H +#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + + /// AArch64LinuxTargetObjectFile - This implementation is used for linux + /// AArch64. + class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp new file mode 100644 index 0000000..69bb80a --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -0,0 +1,2197 @@ +//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the (GNU-style) assembly parser for the AArch64 +// architecture. +// +//===----------------------------------------------------------------------===// + + +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +namespace { + +class AArch64Operand; + +class AArch64AsmParser : public MCTargetAsmParser { + MCSubtargetInfo &STI; + MCAsmParser &Parser; + +#define GET_ASSEMBLER_HEADER +#include "AArch64GenAsmMatcher.inc" + +public: + enum AArch64MatchResultTy { + Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "AArch64GenAsmMatcher.inc" + }; + + AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + MCAsmParserExtension::Initialize(_Parser); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + // These are the public interface of the MCTargetAsmParser + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool ParseDirective(AsmToken DirectiveID); + bool ParseDirectiveTLSDescCall(SMLoc L); + bool ParseDirectiveWord(unsigned Size, SMLoc L); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer&Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); + + // The rest of the sub-parsers have more freedom over interface: they return + // an OperandMatchResultTy because it's less ambiguous than true/false or + // -1/0/1 even if it is more verbose + OperandMatchResultTy + ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic); + + OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal); + + OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind); + + OperandMatchResultTy + ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t NumLanes); + + OperandMatchResultTy + ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t &NumLanes); + + OperandMatchResultTy + ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + template<typename SomeNamedImmMapper> OperandMatchResultTy + ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return ParseNamedImmOperand(SomeNamedImmMapper(), Operands); + } + + OperandMatchResultTy + ParseNamedImmOperand(const NamedImmMapper &Mapper, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + /// Scan the next token (which had better be an identifier) and determine + /// whether it represents a general-purpose or vector register. It returns + /// true if an identifier was found and populates its reference arguments. It + /// does not consume the token. + bool + IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec, + SMLoc &LayoutLoc) const; + +}; + +} + +namespace { + +/// Instances of this class represent a parsed AArch64 machine instruction. +class AArch64Operand : public MCParsedAsmOperand { +private: + enum KindTy { + k_ImmWithLSL, // #uimm {, LSL #amt } + k_CondCode, // eq/ne/... + k_FPImmediate, // Limited-precision floating-point imm + k_Immediate, // Including expressions referencing symbols + k_Register, + k_ShiftExtend, + k_SysReg, // The register operand of MRS and MSR instructions + k_Token, // The mnemonic; other raw tokens the auto-generated + k_WrappedRegister // Load/store exclusive permit a wrapped register. + } Kind; + + SMLoc StartLoc, EndLoc; + + struct ImmWithLSLOp { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + }; + + struct CondCodeOp { + A64CC::CondCodes Code; + }; + + struct FPImmOp { + double Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ShiftExtendOp { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + }; + + struct SysRegOp { + const char *Data; + unsigned Length; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + union { + struct ImmWithLSLOp ImmWithLSL; + struct CondCodeOp CondCode; + struct FPImmOp FPImm; + struct ImmOp Imm; + struct RegOp Reg; + struct ShiftExtendOp ShiftExtend; + struct SysRegOp SysReg; + struct TokOp Tok; + }; + + AArch64Operand(KindTy K, SMLoc S, SMLoc E) + : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {} + +public: + AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() { + } + + SMLoc getStartLoc() const { return StartLoc; } + SMLoc getEndLoc() const { return EndLoc; } + void print(raw_ostream&) const; + void dump() const; + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert((Kind == k_Register || Kind == k_WrappedRegister) + && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == k_Immediate && "Invalid access!"); + return Imm.Val; + } + + A64CC::CondCodes getCondCode() const { + assert(Kind == k_CondCode && "Invalid access!"); + return CondCode.Code; + } + + static bool isNonConstantExpr(const MCExpr *E, + AArch64MCExpr::VariantKind &Variant) { + if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) { + Variant = A64E->getKind(); + return true; + } else if (!isa<MCConstantExpr>(E)) { + Variant = AArch64MCExpr::VK_AARCH64_None; + return true; + } + + return false; + } + + bool isCondCode() const { return Kind == k_CondCode; } + bool isToken() const { return Kind == k_Token; } + bool isReg() const { return Kind == k_Register; } + bool isImm() const { return Kind == k_Immediate; } + bool isMem() const { return false; } + bool isFPImm() const { return Kind == k_FPImmediate; } + bool isShiftOrExtend() const { return Kind == k_ShiftExtend; } + bool isSysReg() const { return Kind == k_SysReg; } + bool isImmWithLSL() const { return Kind == k_ImmWithLSL; } + bool isWrappedReg() const { return Kind == k_WrappedRegister; } + + bool isAddSubImmLSL0() const { + if (!isImmWithLSL()) return false; + if (ImmWithLSL.ShiftAmount != 0) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC + || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12; + } + + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + + bool isAddSubImmLSL12() const { + if (!isImmWithLSL()) return false; + if (ImmWithLSL.ShiftAmount != 12) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12 + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12; + } + + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + + template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const { + if (!isShiftOrExtend()) return false; + + A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType; + if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW)) + return false; + + if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX)) + return false; + + return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0; + } + + bool isAdrpLabel() const { + if (!isImm()) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(getImm(), Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_None + || Variant == AArch64MCExpr::VK_AARCH64_GOT + || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL + || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC; + } + + return isLabel<21, 4096>(); + } + + template<unsigned RegWidth> bool isBitfieldWidth() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + } + + template<int RegWidth> + bool isCVTFixedPos() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + } + + bool isFMOVImm() const { + if (!isFPImm()) return false; + + APFloat RealVal(FPImm.Val); + uint32_t ImmVal; + return A64Imms::isFPImm(RealVal, ImmVal); + } + + bool isFPZero() const { + if (!isFPImm()) return false; + + APFloat RealVal(FPImm.Val); + return RealVal.isPosZero(); + } + + template<unsigned field_width, unsigned scale> + bool isLabel() const { + if (!isImm()) return false; + + if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) { + return true; + } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Min = - (scale * (1LL << (field_width - 1))); + int64_t Max = scale * ((1LL << (field_width - 1)) - 1); + return (Val % scale) == 0 && Val >= Min && Val <= Max; + } + + // N.b. this disallows explicit relocation specifications via an + // AArch64MCExpr. Users needing that behaviour + return false; + } + + bool isLane1() const { + if (!isImm()) return false; + + // Because it's come through custom assembly parsing, it must always be a + // constant expression. + return cast<MCConstantExpr>(getImm())->getValue() == 1; + } + + bool isLoadLitLabel() const { + if (!isImm()) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(getImm(), Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_None + || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL; + } + + return isLabel<19, 4>(); + } + + template<unsigned RegWidth> bool isLogicalImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); + if (!CE) return false; + + uint32_t Bits; + return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + } + + template<unsigned RegWidth> bool isLogicalImmMOV() const { + if (!isLogicalImm<RegWidth>()) return false; + + const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val); + + // The move alias for ORR is only valid if the immediate cannot be + // represented with a move (immediate) instruction; they take priority. + int UImm16, Shift; + return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift) + && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift); + } + + template<int MemSize> + bool isOffsetUImm12() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + + // Assume they know what they're doing for now if they've given us a + // non-constant expression. In principle we could check for ridiculous + // things that can't possibly work or relocations that would almost + // certainly break resulting code. + if (!CE) + return true; + + int64_t Val = CE->getValue(); + + // Must be a multiple of the access size in bytes. + if ((Val & (MemSize - 1)) != 0) return false; + + // Must be 12-bit unsigned + return Val >= 0 && Val <= 0xfff * MemSize; + } + + template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit> + bool isShift() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != SHKind) + return false; + + return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31; + } + + bool isMOVN32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVN64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G2, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + + bool isMOVZ32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0, + AArch64MCExpr::VK_AARCH64_ABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVZ64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0, + AArch64MCExpr::VK_AARCH64_ABS_G1, + AArch64MCExpr::VK_AARCH64_ABS_G2, + AArch64MCExpr::VK_AARCH64_ABS_G3, + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G2, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + bool isMOVK32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0_NC, + AArch64MCExpr::VK_AARCH64_ABS_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVK64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0_NC, + AArch64MCExpr::VK_AARCH64_ABS_G1_NC, + AArch64MCExpr::VK_AARCH64_ABS_G2_NC, + AArch64MCExpr::VK_AARCH64_ABS_G3, + AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + bool isMoveWideImm(unsigned RegWidth, + AArch64MCExpr::VariantKind *PermittedModifiers, + unsigned NumModifiers) const { + if (!isImmWithLSL()) return false; + + if (ImmWithLSL.ShiftAmount % 16 != 0) return false; + if (ImmWithLSL.ShiftAmount >= RegWidth) return false; + + AArch64MCExpr::VariantKind Modifier; + if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) { + // E.g. "#:abs_g0:sym, lsl #16" makes no sense. + if (!ImmWithLSL.ImplicitAmount) return false; + + for (unsigned i = 0; i < NumModifiers; ++i) + if (PermittedModifiers[i] == Modifier) return true; + + return false; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val); + return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff; + } + + template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)> + bool isMoveWideMovAlias() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + int UImm16, Shift; + uint64_t Value = CE->getValue(); + + // If this is a 32-bit instruction then all bits above 32 should be the + // same: either of these is fine because signed/unsigned values should be + // permitted. + if (RegWidth == 32) { + if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff) + return false; + + Value &= 0xffffffffULL; + } + + return isValidImm(RegWidth, Value, UImm16, Shift); + } + + bool isMSRWithReg() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64SysReg::MSRMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isMSRPState() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64PState::PStateMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isMRS() const { + if (!isSysReg()) return false; + + // First check against specific MSR-only (write-only) registers + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64SysReg::MRSMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isPRFM() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + + if (!CE) + return false; + + return CE->getValue() >= 0 && CE->getValue() <= 31; + } + + template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != SHKind) + return false; + + return ShiftExtend.Amount <= 4; + } + + bool isRegExtendLSL() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != A64SE::LSL) + return false; + + return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; + } + + template<int MemSize> bool isSImm7Scaled() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + int64_t Val = CE->getValue(); + if (Val % MemSize != 0) return false; + + Val /= MemSize; + + return Val >= -64 && Val < 64; + } + + template<int BitWidth> + bool isSImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= -(1LL << (BitWidth - 1)) + && CE->getValue() < (1LL << (BitWidth - 1)); + } + + template<int bitWidth> + bool isUImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth); + } + + bool isUImm() const { + if (!isImm()) return false; + + return isa<MCConstantExpr>(getImm()); + } + + static AArch64Operand *CreateImmWithLSL(const MCExpr *Val, + unsigned ShiftAmount, + bool ImplicitAmount, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E); + Op->ImmWithLSL.Val = Val; + Op->ImmWithLSL.ShiftAmount = ShiftAmount; + Op->ImmWithLSL.ImplicitAmount = ImplicitAmount; + return Op; + } + + static AArch64Operand *CreateCondCode(A64CC::CondCodes Code, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E); + Op->CondCode.Code = Code; + return Op; + } + + static AArch64Operand *CreateFPImm(double Val, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E); + Op->FPImm.Val = Val; + return Op; + } + + static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E); + Op->Imm.Val = Val; + return Op; + } + + static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_Register, S, E); + Op->Reg.RegNum = RegNum; + return Op; + } + + static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E); + Op->Reg.RegNum = RegNum; + return Op; + } + + static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp, + unsigned Amount, + bool ImplicitAmount, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E); + Op->ShiftExtend.ShiftType = ShiftTyp; + Op->ShiftExtend.Amount = Amount; + Op->ShiftExtend.ImplicitAmount = ImplicitAmount; + return Op; + } + + static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) { + AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + return Op; + } + + static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { + AArch64Operand *Op = new AArch64Operand(k_Token, S, S); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + return Op; + } + + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + template<unsigned RegWidth> + void addBFILSBOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth; + Inst.addOperand(MCOperand::CreateImm(EncodedVal)); + } + + void addBFIWidthOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + } + + void addBFXWidthOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm(); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + + Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1)); + } + + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getCondCode())); + } + + void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue())); + } + + void addFMOVImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + APFloat RealVal(FPImm.Val); + uint32_t ImmVal; + A64Imms::isFPImm(RealVal, ImmVal); + + Inst.addOperand(MCOperand::CreateImm(ImmVal)); + } + + void addFPZeroOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + Inst.addOperand(MCOperand::CreateImm(0)); + } + + void addInvCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + unsigned Encoded = A64InvertCondCode(getCondCode()); + Inst.addOperand(MCOperand::CreateImm(Encoded)); + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + template<int MemSize> + void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Val = CE->getValue() / MemSize; + Inst.addOperand(MCOperand::CreateImm(Val & 0x7f)); + } + + template<int BitWidth> + void addSImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1))); + } + + void addImmWithLSLOperands(MCInst &Inst, unsigned N) const { + assert (N == 1 && "Invalid number of operands!"); + + addExpr(Inst, ImmWithLSL.Val); + } + + template<unsigned field_width, unsigned scale> + void addLabelOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); + + if (!CE) { + addExpr(Inst, Imm.Val); + return; + } + + int64_t Val = CE->getValue(); + assert(Val % scale == 0 && "Unaligned immediate in instruction"); + Val /= scale; + + Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1))); + } + + template<int MemSize> + void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize)); + } else { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + } + } + + template<unsigned RegWidth> + void addLogicalImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val); + + uint32_t Bits; + A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMRSOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMSRWithRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMSRPStateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMoveWideImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + addExpr(Inst, ImmWithLSL.Val); + + AArch64MCExpr::VariantKind Variant; + if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) { + Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16)); + return; + } + + // We know it's relocated + switch (Variant) { + case AArch64MCExpr::VK_AARCH64_ABS_G0: + case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G0: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: + Inst.addOperand(MCOperand::CreateImm(0)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G1: + case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: + Inst.addOperand(MCOperand::CreateImm(1)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G2: + case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + Inst.addOperand(MCOperand::CreateImm(2)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G3: + Inst.addOperand(MCOperand::CreateImm(3)); + break; + default: llvm_unreachable("Inappropriate move wide relocation"); + } + } + + template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)> + void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int UImm16, Shift; + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Value = CE->getValue(); + + if (RegWidth == 32) { + Value &= 0xffffffffULL; + } + + bool Valid = isValidImm(RegWidth, Value, UImm16, Shift); + (void)Valid; + assert(Valid && "Invalid immediates should have been weeded out by now"); + + Inst.addOperand(MCOperand::CreateImm(UImm16)); + Inst.addOperand(MCOperand::CreateImm(Shift)); + } + + void addPRFMOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + assert(CE->getValue() >= 0 && CE->getValue() <= 31 + && "PRFM operand should be 5-bits"); + + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + } + + // For Add-sub (extended register) operands. + void addRegExtendOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + } + + // For the extend in load-store (register offset) instructions. + template<unsigned MemSize> + void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { + addAddrRegExtendOperands(Inst, N, MemSize); + } + + void addAddrRegExtendOperands(MCInst &Inst, unsigned N, + unsigned MemSize) const { + assert(N == 1 && "Invalid number of operands!"); + + // First bit of Option is set in instruction classes, the high two bits are + // as follows: + unsigned OptionHi = 0; + switch (ShiftExtend.ShiftType) { + case A64SE::UXTW: + case A64SE::LSL: + OptionHi = 1; + break; + case A64SE::SXTW: + case A64SE::SXTX: + OptionHi = 3; + break; + default: + llvm_unreachable("Invalid extend type for register offset"); + } + + unsigned S = 0; + if (MemSize == 1 && !ShiftExtend.ImplicitAmount) + S = 1; + else if (MemSize != 1 && ShiftExtend.Amount != 0) + S = 1; + + Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S)); + } + void addShiftOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + } +}; + +} // end anonymous namespace. + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic) { + + // See if the operand has a custom parser + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + + // It could either succeed, fail or just not care. + if (ResTy != MatchOperand_NoMatch) + return ResTy; + + switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return MatchOperand_ParseFail; + case AsmToken::Identifier: { + // It might be in the LSL/UXTB family ... + OperandMatchResultTy GotShift = ParseShiftExtend(Operands); + + // We can only continue if no tokens were eaten. + if (GotShift != MatchOperand_NoMatch) + return GotShift; + + // ... or it might be a register ... + uint32_t NumLanes = 0; + OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes); + assert(GotReg != MatchOperand_ParseFail + && "register parsing shouldn't partially succeed"); + + if (GotReg == MatchOperand_Success) { + if (Parser.getTok().is(AsmToken::LBrac)) + return ParseNEONLane(Operands, NumLanes); + else + return MatchOperand_Success; + } + + // ... or it might be a symbolish thing + } + // Fall through + case AsmToken::LParen: // E.g. (strcmp-4) + case AsmToken::Integer: // 1f, 2b labels + case AsmToken::String: // quoted labels + case AsmToken::Dot: // . is Current location + case AsmToken::Dollar: // $ is PC + case AsmToken::Colon: { + SMLoc StartLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + const MCExpr *ImmVal = 0; + + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); + return MatchOperand_Success; + } + case AsmToken::Hash: { // Immediates + SMLoc StartLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + const MCExpr *ImmVal = 0; + Parser.Lex(); + + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); + return MatchOperand_Success; + } + case AsmToken::LBrac: { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("[", Loc)); + Parser.Lex(); // Eat '[' + + // There's no comma after a '[', so we can parse the next operand + // immediately. + return ParseOperand(Operands, Mnemonic); + } + // The following will likely be useful later, but not in very early cases + case AsmToken::LCurly: // Weird SIMD lists + llvm_unreachable("Don't know how to deal with '{' in operand"); + return MatchOperand_ParseFail; + } +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { + if (getLexer().is(AsmToken::Colon)) { + AArch64MCExpr::VariantKind RefKind; + + OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind); + if (ResTy != MatchOperand_Success) + return ResTy; + + const MCExpr *SubExprVal; + if (getParser().parseExpression(SubExprVal)) + return MatchOperand_ParseFail; + + ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext()); + return MatchOperand_Success; + } + + // No weird AArch64MCExpr prefix + return getParser().parseExpression(ExprVal) + ? MatchOperand_ParseFail : MatchOperand_Success; +} + +// A lane attached to a NEON register. "[N]", which should yield three tokens: +// '[', N, ']'. A hash is not allowed to precede the immediate here. +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t NumLanes) { + SMLoc Loc = Parser.getTok().getLoc(); + + assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand"); + Operands.push_back(AArch64Operand::CreateToken("[", Loc)); + Parser.Lex(); // Eat '[' + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "expected lane number"); + return MatchOperand_ParseFail; + } + + if (Parser.getTok().getIntVal() >= NumLanes) { + Error(Parser.getTok().getLoc(), "lane number incompatible with layout"); + return MatchOperand_ParseFail; + } + + const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(), + getContext()); + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat actual lane + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImm(Lane, S, E)); + + + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(Parser.getTok().getLoc(), "expected ']' after lane"); + return MatchOperand_ParseFail; + } + + Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Parser.Lex(); // Eat ']' + + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) { + assert(getLexer().is(AsmToken::Colon) && "expected a ':'"); + Parser.Lex(); + + if (getLexer().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), + "expected relocation specifier in operand after ':'"); + return MatchOperand_ParseFail; + } + + std::string LowerCase = Parser.getTok().getIdentifier().lower(); + RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase) + .Case("got", AArch64MCExpr::VK_AARCH64_GOT) + .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12) + .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12) + .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0) + .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC) + .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1) + .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC) + .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2) + .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC) + .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3) + .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0) + .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1) + .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2) + .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2) + .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1) + .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC) + .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0) + .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC) + .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12) + .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12) + .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC) + .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1) + .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC) + .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL) + .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12) + .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2) + .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1) + .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC) + .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0) + .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC) + .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12) + .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12) + .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC) + .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC) + .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12) + .Default(AArch64MCExpr::VK_AARCH64_None); + + if (RefKind == AArch64MCExpr::VK_AARCH64_None) { + Error(Parser.getTok().getLoc(), + "expected relocation specifier in operand after ':'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat identifier + + if (getLexer().isNot(AsmToken::Colon)) { + Error(Parser.getTok().getLoc(), + "expected ':' after relocation specifier"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseImmWithLSLOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // FIXME?: I want to live in a world where immediates must start with + // #. Please don't dash my hopes (well, do if you have a good reason). + if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '#' + + const MCExpr *Imm; + if (ParseImmediate(Imm) != MatchOperand_Success) + return MatchOperand_ParseFail; + else if (Parser.getTok().isNot(AsmToken::Comma)) { + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E)); + return MatchOperand_Success; + } + + // Eat ',' + Parser.Lex(); + + // The optional operand must be "lsl #N" where N is non-negative. + if (Parser.getTok().is(AsmToken::Identifier) + && Parser.getTok().getIdentifier().lower() == "lsl") { + Parser.Lex(); + + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); + return MatchOperand_ParseFail; + } + } + } + + int64_t ShiftAmount = Parser.getTok().getIntVal(); + + if (ShiftAmount < 0) { + Error(Parser.getTok().getLoc(), "positive shift amount required"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the number + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount, + false, S, E)); + return MatchOperand_Success; +} + + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseCondCodeOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + if (Parser.getTok().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + StringRef Tok = Parser.getTok().getIdentifier(); + A64CC::CondCodes CondCode = A64StringToCondCode(Tok); + + if (CondCode == A64CC::Invalid) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat condition code + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseCRxOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + std::string LowerTok = Parser.getTok().getIdentifier().lower(); + StringRef Tok(LowerTok); + if (Tok[0] != 'c') { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + uint32_t CRNum; + bool BadNum = Tok.drop_front().getAsInteger(10, CRNum); + if (BadNum || CRNum > 15) { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext()); + + Parser.Lex(); + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseFPImmOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + // FIXME?: I want to live in a world where immediates must start with + // #. Please don't dash my hopes (well, do if you have a good reason). + if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '#' + + bool Negative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + Negative = true; + Parser.Lex(); // Eat '-' + } else if (Parser.getTok().is(AsmToken::Plus)) { + Parser.Lex(); // Eat '+' + } + + if (Parser.getTok().isNot(AsmToken::Real)) { + Error(S, "Expected floating-point immediate"); + return MatchOperand_ParseFail; + } + + APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString()); + if (Negative) RealVal.changeSign(); + double DblVal = RealVal.convertToDouble(); + + Parser.Lex(); // Eat real number + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E)); + return MatchOperand_Success; +} + + +// Automatically generated +static unsigned MatchRegisterName(StringRef Name); + +bool +AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, + StringRef &Layout, + SMLoc &LayoutLoc) const { + const AsmToken &Tok = Parser.getTok(); + + if (Tok.isNot(AsmToken::Identifier)) + return false; + + std::string LowerReg = Tok.getString().lower(); + size_t DotPos = LowerReg.find('.'); + + RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); + if (RegNum == AArch64::NoRegister) { + RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos)) + .Case("ip0", AArch64::X16) + .Case("ip1", AArch64::X17) + .Case("fp", AArch64::X29) + .Case("lr", AArch64::X30) + .Default(AArch64::NoRegister); + } + if (RegNum == AArch64::NoRegister) + return false; + + SMLoc S = Tok.getLoc(); + RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); + + if (DotPos == StringRef::npos) { + Layout = StringRef(); + } else { + // Everything afterwards needs to be a literal token, expected to be + // '.2d','.b' etc for vector registers. + + // This StringSwitch validates the input and (perhaps more importantly) + // gives us a permanent string to use in the token (a pointer into LowerReg + // would go out of scope when we return). + LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); + std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos); + Layout = StringSwitch<const char *>(LayoutText) + .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") + .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") + .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") + .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b") + .Default(""); + + if (Layout.size() == 0) { + // Malformed register + return false; + } + } + + return true; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t &NumLanes) { + unsigned RegNum; + StringRef Layout; + SMLoc RegEndLoc, LayoutLoc; + SMLoc S = Parser.getTok().getLoc(); + + if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) + return MatchOperand_NoMatch; + + Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc)); + + if (Layout.size() != 0) { + unsigned long long TmpLanes = 0; + llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes); + if (TmpLanes != 0) { + NumLanes = TmpLanes; + } else { + // If the number of lanes isn't specified explicitly, a valid instruction + // will have an element specifier and be capable of acting on the entire + // vector register. + switch (Layout.back()) { + default: llvm_unreachable("Invalid layout specifier"); + case 'b': NumLanes = 16; break; + case 'h': NumLanes = 8; break; + case 's': NumLanes = 4; break; + case 'd': NumLanes = 2; break; + } + } + + Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc)); + } + + Parser.Lex(); + return MatchOperand_Success; +} + +bool +AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + // This callback is used for things like DWARF frame directives in + // assembly. They don't care about things like NEON layouts or lanes, they + // just want to be able to produce the DWARF register number. + StringRef LayoutSpec; + SMLoc RegEndLoc, LayoutLoc; + StartLoc = Parser.getTok().getLoc(); + + if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc)) + return true; + + Parser.Lex(); + EndLoc = Parser.getTok().getLoc(); + + return false; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Since these operands occur in very limited circumstances, without + // alternatives, we actually signal an error if there is no match. If relaxing + // this, beware of unintended consequences: an immediate will be accepted + // during matching, no matter how it gets into the AArch64Operand. + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + + if (Tok.is(AsmToken::Identifier)) { + bool ValidName; + uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName); + + if (!ValidName) { + Error(S, "operand specifier not recognised"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // We're done with the identifier. Eat it + + SMLoc E = Parser.getTok().getLoc(); + const MCExpr *Imm = MCConstantExpr::Create(Code, getContext()); + Operands.push_back(AArch64Operand::CreateImm(Imm, S, E)); + return MatchOperand_Success; + } else if (Tok.is(AsmToken::Hash)) { + Parser.Lex(); + + const MCExpr *ImmVal; + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); + if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) { + Error(S, "Invalid immediate for instruction"); + return MatchOperand_ParseFail; + } + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E)); + return MatchOperand_Success; + } + + Error(S, "unexpected operand for instruction"); + return MatchOperand_ParseFail; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseSysRegOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + + // Any MSR/MRS operand will be an identifier, and we want to store it as some + // kind of string: SPSel is valid for two different forms of MSR with two + // different encodings. There's no collision at the moment, but the potential + // is there. + if (!Tok.is(AsmToken::Identifier)) { + return MatchOperand_NoMatch; + } + + SMLoc S = Tok.getLoc(); + Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S)); + Parser.Lex(); // Eat identifier + + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseLSXAddressOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + unsigned RegNum; + SMLoc RegEndLoc, LayoutLoc; + StringRef Layout; + if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc) + || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum) + || Layout.size() != 0) { + // Check Layout.size because we don't want to let "x3.4s" or similar + // through. + return MatchOperand_NoMatch; + } + Parser.Lex(); // Eat register + + if (Parser.getTok().is(AsmToken::RBrac)) { + // We're done + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); + return MatchOperand_Success; + } + + // Otherwise, only ", #0" is valid + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "expected ',' or ']' after register"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat ',' + + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "expected '#0'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '#' + + if (Parser.getTok().isNot(AsmToken::Integer) + || Parser.getTok().getIntVal() != 0 ) { + Error(Parser.getTok().getLoc(), "expected '#0'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '0' + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseShiftExtend( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + StringRef IDVal = Parser.getTok().getIdentifier(); + std::string LowerID = IDVal.lower(); + + A64SE::ShiftExtSpecifiers Spec = + StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID) + .Case("lsl", A64SE::LSL) + .Case("lsr", A64SE::LSR) + .Case("asr", A64SE::ASR) + .Case("ror", A64SE::ROR) + .Case("uxtb", A64SE::UXTB) + .Case("uxth", A64SE::UXTH) + .Case("uxtw", A64SE::UXTW) + .Case("uxtx", A64SE::UXTX) + .Case("sxtb", A64SE::SXTB) + .Case("sxth", A64SE::SXTH) + .Case("sxtw", A64SE::SXTW) + .Case("sxtx", A64SE::SXTX) + .Default(A64SE::Invalid); + + if (Spec == A64SE::Invalid) + return MatchOperand_NoMatch; + + // Eat the shift + SMLoc S, E; + S = Parser.getTok().getLoc(); + Parser.Lex(); + + if (Spec != A64SE::LSL && Spec != A64SE::LSR && + Spec != A64SE::ASR && Spec != A64SE::ROR) { + // The shift amount can be omitted for the extending versions, but not real + // shifts: + // add x0, x0, x0, uxtb + // is valid, and equivalent to + // add x0, x0, x0, uxtb #0 + + if (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::EndOfStatement) || + Parser.getTok().is(AsmToken::RBrac)) { + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, + S, E)); + return MatchOperand_Success; + } + } + + // Eat # at beginning of immediate + if (!Parser.getTok().is(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), + "expected #imm after shift specifier"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + + // Make sure we do actually have a number + if (!Parser.getTok().is(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), + "expected integer shift amount"); + return MatchOperand_ParseFail; + } + unsigned Amount = Parser.getTok().getIntVal(); + Parser.Lex(); + E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, + S, E)); + + return MatchOperand_Success; +} + +// FIXME: We would really like to be able to tablegen'erate this. +bool AArch64AsmParser:: +validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + switch (Inst.getOpcode()) { + case AArch64::BFIwwii: + case AArch64::BFIxxii: + case AArch64::SBFIZwwii: + case AArch64::SBFIZxxii: + case AArch64::UBFIZwwii: + case AArch64::UBFIZxxii: { + unsigned ImmOps = Inst.getNumOperands() - 2; + int64_t ImmR = Inst.getOperand(ImmOps).getImm(); + int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); + + if (ImmR != 0 && ImmS >= ImmR) { + return Error(Operands[4]->getStartLoc(), + "requested insert overflows register"); + } + return false; + } + case AArch64::BFXILwwii: + case AArch64::BFXILxxii: + case AArch64::SBFXwwii: + case AArch64::SBFXxxii: + case AArch64::UBFXwwii: + case AArch64::UBFXxxii: { + unsigned ImmOps = Inst.getNumOperands() - 2; + int64_t ImmR = Inst.getOperand(ImmOps).getImm(); + int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); + int64_t RegWidth = 0; + switch (Inst.getOpcode()) { + case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii: + RegWidth = 64; + break; + case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii: + RegWidth = 32; + break; + } + + if (ImmS >= RegWidth || ImmS < ImmR) { + return Error(Operands[4]->getStartLoc(), + "requested extract overflows register"); + } + return false; + } + case AArch64::ICix: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal); + if (!A64IC::NeedsRegister(ICOp)) { + return Error(Operands[1]->getStartLoc(), + "specified IC op does not use a register"); + } + return false; + } + case AArch64::ICi: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal); + if (A64IC::NeedsRegister(ICOp)) { + return Error(Operands[1]->getStartLoc(), + "specified IC op requires a register"); + } + return false; + } + case AArch64::TLBIix: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal); + if (!A64TLBI::NeedsRegister(TLBIOp)) { + return Error(Operands[1]->getStartLoc(), + "specified TLBI op does not use a register"); + } + return false; + } + case AArch64::TLBIi: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal); + if (A64TLBI::NeedsRegister(TLBIOp)) { + return Error(Operands[1]->getStartLoc(), + "specified TLBI op requires a register"); + } + return false; + } + } + + return false; +} + + +// Parses the instruction *together with* all operands, appending each parsed +// operand to the "Operands" list +bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + size_t CondCodePos = Name.find('.'); + + StringRef Mnemonic = Name.substr(0, CondCodePos); + Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc)); + + if (CondCodePos != StringRef::npos) { + // We have a condition code + SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1); + StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos); + A64CC::CondCodes Code; + + Code = A64StringToCondCode(CondStr); + + if (Code == A64CC::Invalid) { + Error(S, "invalid condition code"); + Parser.eatToEndOfStatement(); + return true; + } + + SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos); + + Operands.push_back(AArch64Operand::CreateToken(".", DotL)); + SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3); + Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E)); + } + + // Now we parse the operands of this instruction + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Mnemonic)) { + Parser.eatToEndOfStatement(); + return true; + } + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + + // Parse and remember the operand. + if (ParseOperand(Operands, Mnemonic)) { + Parser.eatToEndOfStatement(); + return true; + } + + + // After successfully parsing some operands there are two special cases to + // consider (i.e. notional operands not separated by commas). Both are due + // to memory specifiers: + // + An RBrac will end an address for load/store/prefetch + // + An '!' will indicate a pre-indexed operation. + // + // It's someone else's responsibility to make sure these tokens are sane + // in the given context! + if (Parser.getTok().is(AsmToken::RBrac)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Parser.Lex(); + } + + if (Parser.getTok().is(AsmToken::Exclaim)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("!", Loc)); + Parser.Lex(); + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.eatToEndOfStatement(); + return Error(Loc, "expected comma before next operand"); + } + + // Eat the EndOfStatement + Parser.Lex(); + + return false; +} + +bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".hword") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + else if (IDVal == ".word") + return ParseDirectiveWord(4, DirectiveID.getLoc()); + else if (IDVal == ".xword") + return ParseDirectiveWord(8, DirectiveID.getLoc()); + else if (IDVal == ".tlsdesccall") + return ParseDirectiveTLSDescCall(DirectiveID.getLoc()); + + return true; +} + +/// parseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().parseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + +// parseDirectiveTLSDescCall: +// ::= .tlsdesccall symbol +bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { + StringRef Name; + if (getParser().parseIdentifier(Name)) + return Error(L, "expected symbol after directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); + + MCInst Inst; + Inst.setOpcode(AArch64::TLSDESCCALL); + Inst.addOperand(MCOperand::CreateExpr(Expr)); + + getParser().getStreamer().EmitInstruction(Inst); + return false; +} + + +bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult; + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); + + if (ErrorInfo != ~0U && ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + switch (MatchResult) { + default: break; + case Match_Success: + if (validateInstruction(Inst, Operands)) + return true; + + Out.EmitInstruction(Inst); + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + + case Match_AddSubRegExtendSmall: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]"); + case Match_AddSubRegExtendLarge: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]"); + case Match_AddSubRegShift32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]"); + case Match_AddSubRegShift64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]"); + case Match_AddSubSecondSource: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register, symbol or integer in range [0, 4095]"); + case Match_CVTFixedPos32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 32]"); + case Match_CVTFixedPos64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 64]"); + case Match_CondCode: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected AArch64 condition code"); + case Match_FPImm: + // Any situation which allows a nontrivial floating-point constant also + // allows a register. + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register or floating-point constant"); + case Match_FPZero: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected floating-point constant #0.0"); + case Match_Label: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected label or encodable integer pc offset"); + case Match_Lane1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected lane specifier '[1]'"); + case Match_LoadStoreExtend32_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0"); + case Match_LoadStoreExtend32_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1"); + case Match_LoadStoreExtend32_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2"); + case Match_LoadStoreExtend32_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3"); + case Match_LoadStoreExtend32_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtw' with optional shift of #0 or #4"); + case Match_LoadStoreExtend64_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0"); + case Match_LoadStoreExtend64_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #1"); + case Match_LoadStoreExtend64_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #2"); + case Match_LoadStoreExtend64_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #3"); + case Match_LoadStoreExtend64_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #4"); + case Match_LoadStoreSImm7_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 4 in range [-256, 252]"); + case Match_LoadStoreSImm7_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 8 in range [-512, 508]"); + case Match_LoadStoreSImm7_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 16 in range [-1024, 1016]"); + case Match_LoadStoreSImm9: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [-256, 255]"); + case Match_LoadStoreUImm12_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 4095]"); + case Match_LoadStoreUImm12_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 8190]"); + case Match_LoadStoreUImm12_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 16380]"); + case Match_LoadStoreUImm12_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 32760]"); + case Match_LoadStoreUImm12_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 65520]"); + case Match_LogicalSecondSource: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register or logical immediate"); + case Match_MOVWUImm16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected relocated symbol or integer in range [0, 65535]"); + case Match_MRS: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected readable system register"); + case Match_MSR: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected writable system register or pstate"); + case Match_NamedImm_at: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]"); + case Match_NamedImm_dbarrier: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15] or symbolic barrier operand"); + case Match_NamedImm_dc: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic 'dc' operand"); + case Match_NamedImm_ic: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'"); + case Match_NamedImm_isb: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15] or 'sy'"); + case Match_NamedImm_prefetch: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected prefetch hint: p(ld|st|i)l[123](strm|keep)"); + case Match_NamedImm_tlbi: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected translation buffer invalidation operand"); + case Match_UImm16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 65535]"); + case Match_UImm3: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 7]"); + case Match_UImm4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15]"); + case Match_UImm5: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 31]"); + case Match_UImm6: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 63]"); + case Match_UImm7: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 127]"); + case Match_Width32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [<lsb>, 31]"); + case Match_Width64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [<lsb>, 63]"); + } + + llvm_unreachable("Implement any new match types added!"); + return true; +} + +void AArch64Operand::print(raw_ostream &OS) const { + switch (Kind) { + case k_CondCode: + OS << "<CondCode: " << CondCode.Code << ">"; + break; + case k_FPImmediate: + OS << "<fpimm: " << FPImm.Val << ">"; + break; + case k_ImmWithLSL: + OS << "<immwithlsl: imm=" << ImmWithLSL.Val + << ", shift=" << ImmWithLSL.ShiftAmount << ">"; + break; + case k_Immediate: + getImm()->print(OS); + break; + case k_Register: + OS << "<register " << getReg() << '>'; + break; + case k_Token: + OS << '\'' << getToken() << '\''; + break; + case k_ShiftExtend: + OS << "<shift: type=" << ShiftExtend.ShiftType + << ", amount=" << ShiftExtend.Amount << ">"; + break; + case k_SysReg: { + StringRef Name(SysReg.Data, SysReg.Length); + OS << "<sysreg: " << Name << '>'; + break; + } + default: + llvm_unreachable("No idea how to print this kind of operand"); + break; + } +} + +void AArch64Operand::dump() const { + print(errs()); +} + + +/// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmParser() { + RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64Target); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "AArch64GenAsmMatcher.inc" diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp new file mode 100644 index 0000000..12c1b8f --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -0,0 +1,803 @@ +//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the functions necessary to decode AArch64 instruction +// bitpatterns into MCInsts (with the help of TableGenerated information from +// the instruction definitions). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-disassembler" + +#include "AArch64.h" +#include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +/// AArch64 disassembler for all AArch64 platforms. +class AArch64Disassembler : public MCDisassembler { + const MCRegisterInfo *RegInfo; +public: + /// Initializes the disassembler. + /// + AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) + : MCDisassembler(STI), RegInfo(Info) { + } + + ~AArch64Disassembler() { + } + + /// See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + const MCRegisterInfo *getRegInfo() const { return RegInfo; } +}; + +} + +// Forward-declarations used in the auto-generated files. +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus +DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus +DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, + unsigned OptionHiS, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, + unsigned RmBits, + uint64_t Address, + const void *Decoder); + +template<int RegWidth> +static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, + unsigned FullImm, + uint64_t Address, + const void *Decoder); + +template<int RegWidth> +static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, + unsigned Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +template<typename SomeNamedImmMapper> +static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus +DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, + llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + + +static bool Check(DecodeStatus &Out, DecodeStatus In); + +#include "AArch64GenDisassemblerTables.inc" +#include "AArch64GenInstrInfo.inc" + +static bool Check(DecodeStatus &Out, DecodeStatus In) { + switch (In) { + case MCDisassembler::Success: + // Out stays the same. + return true; + case MCDisassembler::SoftFail: + Out = In; + return true; + case MCDisassembler::Fail: + Out = In; + return false; + } + llvm_unreachable("Invalid DecodeStatus!"); +} + +DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + CommentStream = &cs; + + uint8_t bytes[4]; + + // We want to read exactly 4 bytes of data. + if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // Encoded as a small-endian 32-bit word in the stream. + uint32_t insn = (bytes[3] << 24) | + (bytes[2] << 16) | + (bytes[1] << 8) | + (bytes[0] << 0); + + // Calling the auto-generated decoder function. + DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); + Size = 0; + return MCDisassembler::Fail; +} + +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D); + return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo); +} + +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, + unsigned OptionHiS, + uint64_t Address, + const void *Decoder) { + // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1}, + // S}. Hence we want to check bit 1. + if (!(OptionHiS & 2)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(OptionHiS)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder) { + // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be + // between 0 and 31. + if (Imm6Bits > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder) { + // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32. + if (Imm6Bits < 32) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, + unsigned RmBits, + uint64_t Address, + const void *Decoder) { + // Any bits are valid in the instruction (they're architecturally ignored), + // but a code generator should insert 0. + Inst.addOperand(MCOperand::CreateImm(0)); + return MCDisassembler::Success; +} + + + +template<int RegWidth> +static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, + unsigned FullImm, + uint64_t Address, + const void *Decoder) { + unsigned Imm16 = FullImm & 0xffff; + unsigned Shift = FullImm >> 16; + + if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm16)); + Inst.addOperand(MCOperand::CreateImm(Shift)); + return MCDisassembler::Success; +} + +template<int RegWidth> +static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, + unsigned Bits, + uint64_t Address, + const void *Decoder) { + uint64_t Imm; + if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Bits)); + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder) { + // Only values 0-4 are valid for this 3-bit field + if (ShiftAmount > 4) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder) { + // Only values below 32 are valid for a 32-bit register + if (ShiftAmount > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned ImmS = fieldFromInstruction(Insn, 10, 6); + unsigned ImmR = fieldFromInstruction(Insn, 16, 6); + unsigned SF = fieldFromInstruction(Insn, 31, 1); + + // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise + // out assertions that it thinks should never be hit. + enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc; + Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2); + + if (!SF) { + // ImmR and ImmS must be between 0 and 31 for 32-bit instructions. + if (ImmR > 31 || ImmS > 31) + return MCDisassembler::Fail; + } + + if (SF) { + DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + // BFM MCInsts use Rd as a source too. + if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); + } else { + DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); + // BFM MCInsts use Rd as a source too. + if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder); + } + + // ASR and LSR have more specific patterns so they won't get here: + assert(!(ImmS == 31 && !SF && Opc != BFM) + && "shift should have used auto decode"); + assert(!(ImmS == 63 && SF && Opc != BFM) + && "shift should have used auto decode"); + + // Extension instructions similarly: + if (Opc == SBFM && ImmR == 0) { + assert((ImmS != 7 && ImmS != 15) && "extension got here"); + assert((ImmS != 31 || SF == 0) && "extension got here"); + } else if (Opc == UBFM && ImmR == 0) { + assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here"); + } + + if (Opc == UBFM) { + // It might be a LSL instruction, which actually takes the shift amount + // itself as an MCInst operand. + if (SF && (ImmS + 1) % 64 == ImmR) { + Inst.setOpcode(AArch64::LSLxxi); + Inst.addOperand(MCOperand::CreateImm(63 - ImmS)); + return MCDisassembler::Success; + } else if (!SF && (ImmS + 1) % 32 == ImmR) { + Inst.setOpcode(AArch64::LSLwwi); + Inst.addOperand(MCOperand::CreateImm(31 - ImmS)); + return MCDisassembler::Success; + } + } + + // Otherwise it's definitely either an extract or an insert depending on which + // of ImmR or ImmS is larger. + unsigned ExtractOp, InsertOp; + switch (Opc) { + default: llvm_unreachable("unexpected instruction trying to decode bitfield"); + case SBFM: + ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii; + InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii; + break; + case BFM: + ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii; + InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii; + break; + case UBFM: + ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii; + InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii; + break; + } + + // Otherwise it's a boring insert or extract + Inst.addOperand(MCOperand::CreateImm(ImmR)); + Inst.addOperand(MCOperand::CreateImm(ImmS)); + + + if (ImmS < ImmR) + Inst.setOpcode(InsertOp); + else + Inst.setOpcode(ExtractOp); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + // This decoder exists to add the dummy Lane operand to the MCInst, which must + // be 1 in assembly but has no other real manifestation. + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); + + if (IsToVec) { + DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); + } else { + DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder); + } + + // Add the lane + Inst.addOperand(MCOperand::CreateImm(1)); + + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + DecodeStatus Result = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Rt2 = fieldFromInstruction(Insn, 10, 5); + unsigned SImm7 = fieldFromInstruction(Insn, 15, 7); + unsigned L = fieldFromInstruction(Insn, 22, 1); + unsigned V = fieldFromInstruction(Insn, 26, 1); + unsigned Opc = fieldFromInstruction(Insn, 30, 2); + + // Not an official name, but it turns out that bit 23 distinguishes indexed + // from non-indexed operations. + unsigned Indexed = fieldFromInstruction(Insn, 23, 1); + + if (Indexed && L == 0) { + // The MCInst for an indexed store has an out operand and 4 ins: + // Rn_wb, Rt, Rt2, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + // You shouldn't load to the same register twice in an instruction... + if (L && Rt == Rt2) + Result = MCDisassembler::SoftFail; + + // ... or do any operation that writes-back to a transfer register. But note + // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different. + if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn)) + Result = MCDisassembler::SoftFail; + + // Exactly how we decode the MCInst's registers depends on the Opc and V + // fields of the instruction. These also obviously determine the size of the + // operation so we can fill in that information while we're at it. + if (V) { + // The instruction operates on the FP/SIMD registers + switch (Opc) { + default: return MCDisassembler::Fail; + case 0: + DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 1: + DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 2: + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder); + break; + } + } else { + switch (Opc) { + default: return MCDisassembler::Fail; + case 0: + DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 1: + assert(L && "unexpected \"store signed\" attempt"); + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 2: + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + } + } + + if (Indexed && L == 1) { + // The MCInst for an indexed load has 3 out operands and an 3 ins: + // Rt, Rt2, Rn_wb, Rt2, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(SImm7)); + + return Result; +} + +static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, + uint32_t Val, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Val, 0, 5); + unsigned Rn = fieldFromInstruction(Val, 5, 5); + unsigned Rt2 = fieldFromInstruction(Val, 10, 5); + unsigned MemSize = fieldFromInstruction(Val, 30, 2); + + DecodeStatus S = MCDisassembler::Success; + if (Rt == Rt2) S = MCDisassembler::SoftFail; + + switch (MemSize) { + case 2: + if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + break; + case 3: + if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction"); + } + + if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +template<typename SomeNamedImmMapper> +static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + SomeNamedImmMapper Mapper; + bool ValidNamed; + Mapper.toString(Val, ValidNamed); + if (ValidNamed || Mapper.validImm(Val)) { + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + bool ValidNamed; + Mapper.toString(Val, ValidNamed); + + Inst.addOperand(MCOperand::CreateImm(Val)); + + return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail; +} + +static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address, + Decoder); +} + +static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address, + Decoder); +} + +static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Imm9 = fieldFromInstruction(Insn, 12, 9); + + unsigned Opc = fieldFromInstruction(Insn, 22, 2); + unsigned V = fieldFromInstruction(Insn, 26, 1); + unsigned Size = fieldFromInstruction(Insn, 30, 2); + + if (Opc == 0 || (V == 1 && Opc == 2)) { + // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + if (V == 0 && (Opc == 2 || Size == 3)) { + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + } else if (V == 0) { + DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); + } else if (V == 1 && (Opc & 2)) { + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + } else { + switch (Size) { + case 0: + DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder); + break; + case 1: + DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + break; + } + } + + if (Opc != 0 && (V != 1 || Opc != 2)) { + // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + + Inst.addOperand(MCOperand::CreateImm(Imm9)); + + // N.b. The official documentation says undpredictable if Rt == Rn, but this + // takes place at the architectural rather than encoding level: + // + // "STR xzr, [sp], #4" is perfectly valid. + if (V == 0 && Rt == Rn && Rn != 31) + return MCDisassembler::SoftFail; + else + return MCDisassembler::Success; +} + +static MCDisassembler *createAArch64Disassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new AArch64Disassembler(STI, T.createMCRegInfo("")); +} + +extern "C" void LLVMInitializeAArch64Disassembler() { + TargetRegistry::RegisterMCDisassembler(TheAArch64Target, + createAArch64Disassembler); +} + + diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp new file mode 100644 index 0000000..82ce80c --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -0,0 +1,408 @@ +//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "AArch64GenAsmWriter.inc" + +static int64_t unpackSignedImm(int BitWidth, uint64_t Value) { + assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit"); + if (Value & (1ULL << (BitWidth - 1))) + return static_cast<int64_t>(Value) - (1LL << BitWidth); + else + return Value; +} + +AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI, MII, MRI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void +AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O) { + const MCOperand &MOImm = MI->getOperand(OpNum); + int32_t Imm = unpackSignedImm(9, MOImm.getImm()); + + O << '#' << Imm; +} + +void +AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned MemSize, + unsigned RmSize) { + unsigned ExtImm = MI->getOperand(OpNum).getImm(); + unsigned OptionHi = ExtImm >> 1; + unsigned S = ExtImm & 1; + bool IsLSL = OptionHi == 1 && RmSize == 64; + + const char *Ext; + switch (OptionHi) { + case 1: + Ext = (RmSize == 32) ? "uxtw" : "lsl"; + break; + case 3: + Ext = (RmSize == 32) ? "sxtw" : "sxtx"; + break; + default: + llvm_unreachable("Incorrect Option on load/store (reg offset)"); + } + O << Ext; + + if (S) { + unsigned ShiftAmt = Log2_32(MemSize); + O << " #" << ShiftAmt; + } else if (IsLSL) { + O << " #0"; + } +} + +void +AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O) { + const MCOperand &Imm12Op = MI->getOperand(OpNum); + + if (Imm12Op.isImm()) { + int64_t Imm12 = Imm12Op.getImm(); + assert(Imm12 >= 0 && "Invalid immediate for add/sub imm"); + O << "#" << Imm12; + } else { + assert(Imm12Op.isExpr() && "Unexpected shift operand type"); + O << "#" << *Imm12Op.getExpr(); + } +} + +void +AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + + printAddSubImmLSL0Operand(MI, OpNum, O); + + O << ", lsl #12"; +} + +void +AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + O << MO.getImm(); +} + +template<unsigned RegWidth> void +AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmROp = MI->getOperand(OpNum); + unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm(); + + O << '#' << LSB; +} + +void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmSOp = MI->getOperand(OpNum); + unsigned Width = ImmSOp.getImm() + 1; + + O << '#' << Width; +} + +void +AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmSOp = MI->getOperand(OpNum); + const MCOperand &ImmROp = MI->getOperand(OpNum - 1); + + unsigned ImmR = ImmROp.getImm(); + unsigned ImmS = ImmSOp.getImm(); + + assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract"); + + O << '#' << (ImmS - ImmR + 1); +} + +void +AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &CRx = MI->getOperand(OpNum); + + O << 'c' << CRx.getImm(); +} + + +void +AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ScaleOp = MI->getOperand(OpNum); + + O << '#' << (64 - ScaleOp.getImm()); +} + + +void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + const MCOperand &MOImm8 = MI->getOperand(OpNum); + + assert(MOImm8.isImm() + && "Immediate operand required for floating-point immediate inst"); + + uint32_t Imm8 = MOImm8.getImm(); + uint32_t Fraction = Imm8 & 0xf; + uint32_t Exponent = (Imm8 >> 4) & 0x7; + uint32_t Negative = (Imm8 >> 7) & 0x1; + + float Val = 1.0f + Fraction / 16.0f; + + // That is: + // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4, + // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0 + if (Exponent & 0x4) { + Val /= 1 << (7 - Exponent); + } else { + Val *= 1 << (Exponent + 1); + } + + Val = Negative ? -Val : Val; + + o << '#' << format("%.8f", Val); +} + +void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + o << "#0.0"; +} + +void +AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm())); +} + +template <unsigned field_width, unsigned scale> void +AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + if (!MO.isImm()) { + printOperand(MI, OpNum, O); + return; + } + + // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which + // is multiplied by 4 (because all A64 instructions are 32-bits wide). + uint64_t UImm = MO.getImm(); + uint64_t Sign = UImm & (1LL << (field_width - 1)); + int64_t SImm = scale * ((UImm & ~Sign) - Sign); + + O << "#" << SImm; +} + +template<unsigned RegWidth> void +AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + uint64_t Val; + A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val); + O << "#0x"; + O.write_hex(Val); +} + +void +AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, int MemSize) { + const MCOperand &MOImm = MI->getOperand(OpNum); + + if (MOImm.isImm()) { + uint32_t Imm = MOImm.getImm() * MemSize; + + O << "#" << Imm; + } else { + O << "#" << *MOImm.getExpr(); + } +} + +void +AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, + A64SE::ShiftExtSpecifiers Shift) { + const MCOperand &MO = MI->getOperand(OpNum); + + // LSL #0 is not printed + if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0) + return; + + switch (Shift) { + case A64SE::LSL: O << "lsl"; break; + case A64SE::LSR: O << "lsr"; break; + case A64SE::ASR: O << "asr"; break; + case A64SE::ROR: O << "ror"; break; + default: llvm_unreachable("Invalid shift specifier in logical instruction"); + } + + O << " #" << MO.getImm(); +} + +void +AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &UImm16MO = MI->getOperand(OpNum); + const MCOperand &ShiftMO = MI->getOperand(OpNum + 1); + + if (UImm16MO.isImm()) { + O << '#' << UImm16MO.getImm(); + + if (ShiftMO.getImm() != 0) + O << ", lsl #" << (ShiftMO.getImm() * 16); + + return; + } + + O << "#" << *UImm16MO.getExpr(); +} + +void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + bool ValidName; + const MCOperand &MO = MI->getOperand(OpNum); + StringRef Name = Mapper.toString(MO.getImm(), ValidName); + + if (ValidName) + O << Name; + else + O << '#' << MO.getImm(); +} + +void +AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + bool ValidName; + std::string Name = Mapper.toString(MO.getImm(), ValidName); + if (ValidName) { + O << Name; + return; + } +} + + +void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O, + A64SE::ShiftExtSpecifiers Ext) { + // FIXME: In principle TableGen should be able to detect this itself far more + // easily. We will only accumulate more of these hacks. + unsigned Reg0 = MI->getOperand(0).getReg(); + unsigned Reg1 = MI->getOperand(1).getReg(); + + if (isStackReg(Reg0) || isStackReg(Reg1)) { + A64SE::ShiftExtSpecifiers LSLEquiv; + + if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP) + LSLEquiv = A64SE::UXTX; + else + LSLEquiv = A64SE::UXTW; + + if (Ext == LSLEquiv) { + O << "lsl #" << MI->getOperand(OpNum).getImm(); + return; + } + } + + switch (Ext) { + case A64SE::UXTB: O << "uxtb"; break; + case A64SE::UXTH: O << "uxth"; break; + case A64SE::UXTW: O << "uxtw"; break; + case A64SE::UXTX: O << "uxtx"; break; + case A64SE::SXTB: O << "sxtb"; break; + case A64SE::SXTH: O << "sxth"; break; + case A64SE::SXTW: O << "sxtw"; break; + case A64SE::SXTX: O << "sxtx"; break; + default: llvm_unreachable("Unexpected shift type for printing"); + } + + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getImm() != 0) + O << " #" << MO.getImm(); +} + +template<int MemScale> void +AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOImm = MI->getOperand(OpNum); + int32_t Imm = unpackSignedImm(7, MOImm.getImm()); + + O << "#" << (Imm * MemScale); +} + +void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + // If a symbolic branch target was added as a constant expression then print + // that address in hex. + const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } + else { + // Otherwise, just print the expression. + O << *Op.getExpr(); + } + } +} + + +void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + if (MI->getOpcode() == AArch64::TLSDESCCALL) { + // This is a special assembler directive which applies an + // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed + // form outside the normal TableGenerated scheme. + O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr(); + } else if (!printAliasInstr(MI, O)) + printInstruction(MI, O); + + printAnnotation(O, Annot); +} diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h new file mode 100644 index 0000000..639fa86 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -0,0 +1,172 @@ +//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64INSTPRINTER_H +#define LLVM_AARCH64INSTPRINTER_H + +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class MCOperand; + +class AArch64InstPrinter : public MCInstPrinter { +public: + AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + + // Autogenerated by tblgen + void printInstruction(const MCInst *MI, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); + + void printRegName(raw_ostream &O, unsigned RegNum) const; + + template<unsigned MemSize, unsigned RmSize> + void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize); + } + + + void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned MemSize, + unsigned RmSize); + + void printAddSubImmLSL0Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O); + void printAddSubImmLSL12Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O); + + void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<unsigned RegWidth> + void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + + void printCondCodeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printCRxOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); + + void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); + + template<int MemScale> + void printOffsetUImm12Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &o) { + printOffsetUImm12Operand(MI, OpNum, o, MemScale); + } + + void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &o, int MemScale); + + template<unsigned field_width, unsigned scale> + void printLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template<unsigned RegWidth> + void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<typename SomeNamedImmMapper> + void printNamedImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O); + } + + void printNamedImmOperand(const NamedImmMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printMRSOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O); + } + + void printMSROperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O); + } + + void printShiftOperand(const char *name, const MCInst *MI, + unsigned OpIdx, raw_ostream &O); + + void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("lsr", MI, OpNum, O); + } + void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("asr", MI, OpNum, O); + } + void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("ror", MI, OpNum, O); + } + + template<A64SE::ShiftExtSpecifiers Shift> + void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand(MI, OpNum, O, Shift); + } + + void printShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, A64SE::ShiftExtSpecifiers Sh); + + + void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template<int MemSize> void + printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<A64SE::ShiftExtSpecifiers EXT> + void printRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printRegExtendOperand(MI, OpNum, O, EXT); + } + + void printRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + + bool isStackReg(unsigned RegNo) { + return RegNo == AArch64::XSP || RegNo == AArch64::WSP; + } + + +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp new file mode 100644 index 0000000..a3373b1 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -0,0 +1,585 @@ +//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the MCAsmBackend class, +// which is principally concerned with relaxation of the various fixup kinds. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class AArch64AsmBackend : public MCAsmBackend { + const MCSubtargetInfo* STI; +public: + AArch64AsmBackend(const Target &T, const StringRef TT) + : MCAsmBackend(), + STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", "")) + {} + + + ~AArch64AsmBackend() { + delete STI; + } + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; + + virtual void processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved); +}; +} // end anonymous namespace + +void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, + const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + // The ADRP instruction adds some multiple of 0x1000 to the current PC & + // ~0xfff. This means that the required offset to reach a symbol can vary by + // up to one step depending on where the ADRP is in memory. For example: + // + // ADRP x0, there + // there: + // + // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and + // we'll need that as an offset. At any other address "there" will be in the + // same page as the ADRP and the instruction should encode 0x0. Assuming the + // section isn't 0x1000-aligned, we therefore need to delegate this decision + // to the linker -- a relocation! + if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page) + IsResolved = false; +} + + +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value); + +namespace { + +class ELFAArch64AsmBackend : public AArch64AsmBackend { +public: + uint8_t OSABI; + ELFAArch64AsmBackend(const Target &T, const StringRef TT, + uint8_t _OSABI) + : AArch64AsmBackend(T, TT), OSABI(_OSABI) { } + + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const; + + unsigned int getNumFixupKinds() const { + return AArch64::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { +// This table *must* be in the order that the fixup_* kinds are defined in +// AArch64FixupKinds.h. +// +// Name Offset (bits) Size (bits) Flags +{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_add_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst128_lo12", 0, 32, 0 }, +{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_movw_uabs_g0", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g1", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g2", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g3", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g0", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g1", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g2", 0, 32, 0 }, +{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_movw_tprel_g2", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g0", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_add_tprel_hi12", 0, 32, 0 }, +{ "fixup_a64_add_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_call", 0, 0, 0 } + }; + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8; + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the bits + // from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) { + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + } + } + + bool mayNeedRelaxation(const MCInst&) const { + return false; + } + + void relaxInstruction(const MCInst&, llvm::MCInst&) const { + llvm_unreachable("Cannot relax instructions"); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createAArch64ELFObjectWriter(OS, OSABI); + } +}; + +} // end anonymous namespace + +bool +ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + // Correct for now. With all instructions 32-bit only very low-level + // considerations could make you select something which may fail. + return false; +} + + +bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { + // Can't emit NOP with size not multiple of 32-bits + if (Count % 4 != 0) + return false; + + uint64_t NumNops = Count / 4; + for (uint64_t i = 0; i != NumNops; ++i) + OW->Write32(0xd503201f); + + return true; +} + +static unsigned ADRImmBits(unsigned Value) { + unsigned lo2 = Value & 0x3; + unsigned hi19 = (Value & 0x1fffff) >> 2; + + return (hi19 << 5) | (lo2 << 29); +} + +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_2: + assert((int64_t)Value >= -32768 && + (int64_t)Value <= 65536 && + "Out of range ABS16 fixup"); + return Value; + case FK_Data_4: + assert((int64_t)Value >= -(1LL << 31) && + (int64_t)Value <= (1LL << 32) - 1 && + "Out of range ABS32 fixup"); + return Value; + case FK_Data_8: + return Value; + + case AArch64::fixup_a64_ld_gottprel_prel19: + // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F + // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20. + case AArch64::fixup_a64_ld_prel: + // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits + // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup"); + return (Value & 0x1ffffc) << 3; + + case AArch64::fixup_a64_adr_prel: + // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of + // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range ADR fixup"); + return ADRImmBits(Value & 0x1fffff); + + case AArch64::fixup_a64_adr_prel_page: + // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF + // F000 of the result of the operation, checking that -2^32 <= result < + // 2^32. + assert((int64_t)Value >= -(1LL << 32) && + (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); + return ADRImmBits((Value & 0x1fffff000ULL) >> 12); + + case AArch64::fixup_a64_add_dtprel_hi12: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits + // FF F000 of DTPREL(S+A), check 0 <= X < 2^24. + case AArch64::fixup_a64_add_tprel_hi12: + // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits + // FF F000 of TPREL(S+A), check 0 <= X < 2^24. + assert((int64_t)Value >= 0 && + (int64_t)Value < (1LL << 24) && "Out of range ADD fixup"); + return (Value & 0xfff000) >> 2; + + case AArch64::fixup_a64_add_dtprel_lo12: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits + // FFF of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_add_tprel_lo12: + // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits + // FFF of TPREL(S+A), check 0 <= X < 2^12. + assert((int64_t)Value >= 0 && + (int64_t)Value < (1LL << 12) && "Out of range ADD fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_add_dtprel_lo12_nc: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits + // FFF of DTPREL(S+A) with no overflow check. + case AArch64::fixup_a64_add_tprel_lo12_nc: + // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits + // FFF of TPREL(S+A) with no overflow check. + case AArch64::fixup_a64_tlsdesc_add_lo12_nc: + // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits + // FFF of G(TLSDESC(S+A)), with no overflow check. + case AArch64::fixup_a64_add_lo12: + // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of + // S+A, with no overflow check. + return (Value & 0xfff) << 10; + + case AArch64::fixup_a64_ldst8_dtprel_lo12: + // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst8_tprel_lo12: + // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst8_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst8_lo12: + // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF + // of S+A, with no overflow check. + return (Value & 0xfff) << 10; + + case AArch64::fixup_a64_ldst16_dtprel_lo12: + // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst16_tprel_lo12: + // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst16_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst16_lo12: + // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE + // of S+A, with no overflow check. + return (Value & 0xffe) << 9; + + case AArch64::fixup_a64_ldst32_dtprel_lo12: + // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst32_tprel_lo12: + // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst32_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst32_lo12: + // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC + // of S+A, with no overflow check. + return (Value & 0xffc) << 8; + + case AArch64::fixup_a64_ldst64_dtprel_lo12: + // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst64_tprel_lo12: + // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst64_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst64_lo12: + // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8 + // of S+A, with no overflow check. + return (Value & 0xff8) << 7; + + case AArch64::fixup_a64_ldst128_lo12: + // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0 + // of S+A, with no overflow check. + return (Value & 0xff0) << 6; + + case AArch64::fixup_a64_movw_uabs_g0: + // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A + // with a check that S+A < 2^16 + assert(Value <= 0xffff && "Out of range move wide fixup"); + return (Value & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g0_nc: + // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of DTPREL(S+A) with no overflow check. + case AArch64::fixup_a64_movw_gottprel_g0_nc: + // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of G(TPREL(S+A)) - GOT with no overflow check. + case AArch64::fixup_a64_movw_tprel_g0_nc: + // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of TPREL(S+A) with no overflow check. + case AArch64::fixup_a64_movw_uabs_g0_nc: + // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of + // S+A with no overflow check. + return (Value & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g1: + // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of + // S+A with a check that S+A < 2^32 + assert(Value <= 0xffffffffull && "Out of range move wide fixup"); + return ((Value >> 16) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g1_nc: + // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field + // to bits FFFF0000 of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_movw_tprel_g1_nc: + // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field + // to bits FFFF0000 of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_movw_uabs_g1_nc: + // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits + // FFFF0000 of S+A with no overflow check. + return ((Value >> 16) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g2: + // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000 + // 0000 of S+A with a check that S+A < 2^48 + assert(Value <= 0xffffffffffffull && "Out of range move wide fixup"); + return ((Value >> 32) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g2_nc: + // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000 + // 0000 of S+A with no overflow check. + return ((Value >> 32) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g3: + // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000 + // 0000 0000 of S+A (no overflow check needed) + return ((Value >> 48) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g0: + // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field + // to bits FFFF of DTPREL(S+A). + case AArch64::fixup_a64_movw_tprel_g0: + // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to + // bits FFFF of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g0: { + // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of + // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we + // should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 16) && Signed < (1LL << 16) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = (Value & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + // MCCodeEmitter should have encoded a MOVN, which is fine. + Value = (~Value & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_movw_dtprel_g1: + // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field + // to bits FFFF0000 of DTPREL(S+A). + case AArch64::fixup_a64_movw_gottprel_g1: + // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field + // to bits FFFF0000 of G(TPREL(S+A)) - GOT. + case AArch64::fixup_a64_movw_tprel_g1: + // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to + // bits FFFF0000 of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g1: { + // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000 + // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we + // should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 32) && Signed < (1LL << 32) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = ((Value >> 16) & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + Value = ((~Value >> 16) & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_movw_dtprel_g2: + // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field + // to bits FFFF 0000 0000 of DTPREL(S+A). + case AArch64::fixup_a64_movw_tprel_g2: + // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to + // bits FFFF 0000 0000 of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g2: { + // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000 + // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that + // we should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 48) && Signed < (1LL << 48) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = ((Value >> 32) & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + Value = ((~Value >> 32) & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_tstbr: + // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to + // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15. + assert((int64_t)Value >= -(1LL << 15) && + (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup"); + return (Value & 0xfffc) << (5 - 2); + + case AArch64::fixup_a64_condbr: + // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch + // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup"); + return (Value & 0x1ffffc) << (5 - 2); + + case AArch64::fixup_a64_uncondbr: + // R_AARCH64_JUMP26 same as below (except to a linker, possibly). + case AArch64::fixup_a64_call: + // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P, + // checking that -2^27 <= S+A-P < 2^27. + assert((int64_t)Value >= -(1LL << 27) && + (int64_t)Value < (1LL << 27) && "Out of range branch fixup"); + return (Value & 0xffffffc) >> 2; + + case AArch64::fixup_a64_adr_gottprel_page: + // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits + // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32. + case AArch64::fixup_a64_tlsdesc_adr_page: + // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000 + // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32. + case AArch64::fixup_a64_adr_prel_got_page: + // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits + // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) < + // 2^32. + assert((int64_t)Value >= -(1LL << 32) && + (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); + return ADRImmBits((Value & 0x1fffff000ULL) >> 12); + + case AArch64::fixup_a64_ld64_gottprel_lo12_nc: + // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8 + // of X, with no overflow check. Check that X & 7 == 0. + case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: + // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of + // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0. + case AArch64::fixup_a64_ld64_got_lo12_nc: + // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of + // G(S) with no overflow check. Check X & 7 == 0 + assert(((int64_t)Value & 7) == 0 && "Misaligned fixup"); + return (Value & 0xff8) << 7; + + case AArch64::fixup_a64_tlsdesc_call: + // R_AARCH64_TLSDESC_CALL: For relaxation only. + return 0; + } +} + +MCAsmBackend * +llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { + Triple TheTriple(TT); + + return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS()); +} diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp new file mode 100644 index 0000000..4bcc65d --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -0,0 +1,292 @@ +//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file handles ELF-specific object emission, converting LLVM's internal +// fixups into the appropriate relocations. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { +class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { +public: + AArch64ELFObjectWriter(uint8_t OSABI); + + virtual ~AArch64ELFObjectWriter(); + +protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; +private: +}; +} + +AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, + /*HasRelocationAddend*/ true) +{} + +AArch64ELFObjectWriter::~AArch64ELFObjectWriter() +{} + +unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + unsigned Type; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_Data_8: + return ELF::R_AARCH64_PREL64; + case FK_Data_4: + return ELF::R_AARCH64_PREL32; + case FK_Data_2: + return ELF::R_AARCH64_PREL16; + case AArch64::fixup_a64_ld_prel: + Type = ELF::R_AARCH64_LD_PREL_LO19; + break; + case AArch64::fixup_a64_adr_prel: + Type = ELF::R_AARCH64_ADR_PREL_LO21; + break; + case AArch64::fixup_a64_adr_prel_page: + Type = ELF::R_AARCH64_ADR_PREL_PG_HI21; + break; + case AArch64::fixup_a64_adr_prel_got_page: + Type = ELF::R_AARCH64_ADR_GOT_PAGE; + break; + case AArch64::fixup_a64_tstbr: + Type = ELF::R_AARCH64_TSTBR14; + break; + case AArch64::fixup_a64_condbr: + Type = ELF::R_AARCH64_CONDBR19; + break; + case AArch64::fixup_a64_uncondbr: + Type = ELF::R_AARCH64_JUMP26; + break; + case AArch64::fixup_a64_call: + Type = ELF::R_AARCH64_CALL26; + break; + case AArch64::fixup_a64_adr_gottprel_page: + Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; + break; + case AArch64::fixup_a64_ld_gottprel_prel19: + Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; + break; + case AArch64::fixup_a64_tlsdesc_adr_page: + Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_Data_8: + return ELF::R_AARCH64_ABS64; + case FK_Data_4: + return ELF::R_AARCH64_ABS32; + case FK_Data_2: + return ELF::R_AARCH64_ABS16; + case AArch64::fixup_a64_add_lo12: + Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ld64_got_lo12_nc: + Type = ELF::R_AARCH64_LD64_GOT_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_lo12: + Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_lo12: + Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_lo12: + Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_lo12: + Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst128_lo12: + Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC; + break; + case AArch64::fixup_a64_movw_uabs_g0: + Type = ELF::R_AARCH64_MOVW_UABS_G0; + break; + case AArch64::fixup_a64_movw_uabs_g0_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G0_NC; + break; + case AArch64::fixup_a64_movw_uabs_g1: + Type = ELF::R_AARCH64_MOVW_UABS_G1; + break; + case AArch64::fixup_a64_movw_uabs_g1_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G1_NC; + break; + case AArch64::fixup_a64_movw_uabs_g2: + Type = ELF::R_AARCH64_MOVW_UABS_G2; + break; + case AArch64::fixup_a64_movw_uabs_g2_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G2_NC; + break; + case AArch64::fixup_a64_movw_uabs_g3: + Type = ELF::R_AARCH64_MOVW_UABS_G3; + break; + case AArch64::fixup_a64_movw_sabs_g0: + Type = ELF::R_AARCH64_MOVW_SABS_G0; + break; + case AArch64::fixup_a64_movw_sabs_g1: + Type = ELF::R_AARCH64_MOVW_SABS_G1; + break; + case AArch64::fixup_a64_movw_sabs_g2: + Type = ELF::R_AARCH64_MOVW_SABS_G2; + break; + + // TLS Local-dynamic block + case AArch64::fixup_a64_movw_dtprel_g2: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; + break; + case AArch64::fixup_a64_movw_dtprel_g1: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; + break; + case AArch64::fixup_a64_movw_dtprel_g1_nc: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; + break; + case AArch64::fixup_a64_movw_dtprel_g0: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; + break; + case AArch64::fixup_a64_movw_dtprel_g0_nc: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; + break; + case AArch64::fixup_a64_add_dtprel_hi12: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; + break; + case AArch64::fixup_a64_add_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; + break; + case AArch64::fixup_a64_add_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; + break; + + // TLS initial-exec block + case AArch64::fixup_a64_movw_gottprel_g1: + Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; + break; + case AArch64::fixup_a64_movw_gottprel_g0_nc: + Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; + break; + case AArch64::fixup_a64_ld64_gottprel_lo12_nc: + Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; + break; + + // TLS local-exec block + case AArch64::fixup_a64_movw_tprel_g2: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; + break; + case AArch64::fixup_a64_movw_tprel_g1: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; + break; + case AArch64::fixup_a64_movw_tprel_g1_nc: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; + break; + case AArch64::fixup_a64_movw_tprel_g0: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; + break; + case AArch64::fixup_a64_movw_tprel_g0_nc: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; + break; + case AArch64::fixup_a64_add_tprel_hi12: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; + break; + case AArch64::fixup_a64_add_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; + break; + case AArch64::fixup_a64_add_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst8_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst16_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst32_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst64_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; + break; + + // TLS general-dynamic block + case AArch64::fixup_a64_tlsdesc_adr_page: + Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; + break; + case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: + Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; + break; + case AArch64::fixup_a64_tlsdesc_add_lo12_nc: + Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; + break; + case AArch64::fixup_a64_tlsdesc_call: + Type = ELF::R_AARCH64_TLSDESC_CALL; + break; + } + } + + return Type; +} + +MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp new file mode 100644 index 0000000..b83577a --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -0,0 +1,160 @@ +//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits AArch64 ELF .o object files. Different +// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit +// regions of data and code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +/// Extend the generic ELFStreamer class so that it can emit mapping symbols at +/// the appropriate points in the object files. These symbols are defined in the +/// AArch64 ELF ABI: +/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf +/// +/// In brief: $x or $d should be emitted at the start of each contiguous region +/// of A64 code or data in a section. In practice, this emission does not rely +/// on explicit assembler directives but on inherent properties of the +/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an +/// instruction). +/// +/// As a result this system is orthogonal to the DataRegion infrastructure used +/// by MachO. Beware! +class AArch64ELFStreamer : public MCELFStreamer { +public: + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCELFStreamer(Context, TAB, OS, Emitter), + MappingSymbolCounter(0), LastEMS(EMS_None) { + } + + ~AArch64ELFStreamer() {} + + virtual void ChangeSection(const MCSection *Section) { + // We have to keep track of the mapping symbol state of any sections we + // use. Each one should start off as EMS_None, which is provided as the + // default constructor by DenseMap::lookup. + LastMappingSymbols[getPreviousSection()] = LastEMS; + LastEMS = LastMappingSymbols.lookup(Section); + + MCELFStreamer::ChangeSection(Section); + } + + /// This function is the one used to emit instruction data into the ELF + /// streamer. We override it to add the appropriate mapping symbol if + /// necessary. + virtual void EmitInstruction(const MCInst& Inst) { + EmitA64MappingSymbol(); + MCELFStreamer::EmitInstruction(Inst); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) + /// if necessary. + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitBytes(Data, AddrSpace); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) + /// if necessary. + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + } + +private: + enum ElfMappingSymbol { + EMS_None, + EMS_A64, + EMS_Data + }; + + void EmitDataMappingSymbol() { + if (LastEMS == EMS_Data) return; + EmitMappingSymbol("$d"); + LastEMS = EMS_Data; + } + + void EmitA64MappingSymbol() { + if (LastEMS == EMS_A64) return; + EmitMappingSymbol("$x"); + LastEMS = EMS_A64; + } + + void EmitMappingSymbol(StringRef Name) { + MCSymbol *Start = getContext().CreateTempSymbol(); + EmitLabel(Start); + + MCSymbol *Symbol = + getContext().GetOrCreateSymbol(Name + "." + + Twine(MappingSymbolCounter++)); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + MCELF::SetType(SD, ELF::STT_NOTYPE); + MCELF::SetBinding(SD, ELF::STB_LOCAL); + SD.setExternal(false); + Symbol->setSection(*getCurrentSection()); + + const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + Symbol->setVariableValue(Value); + } + + int64_t MappingSymbolCounter; + + DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; + ElfMappingSymbol LastEMS; + + /// @} +}; +} + +namespace llvm { + MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) { + AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + if (NoExecStack) + S->getAssembler().setNoExecStack(true); + return S; + } +} + + diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h new file mode 100644 index 0000000..5a89ca5 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -0,0 +1,27 @@ +//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF streamer information for the AArch64 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_ELF_STREAMER_H +#define LLVM_AARCH64_ELF_STREAMER_H + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + + MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack); +} + +#endif // AArch64_ELF_STREAMER_H diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h new file mode 100644 index 0000000..eeb122d --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -0,0 +1,113 @@ +//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the LLVM fixups applied to MCInsts in the AArch64 +// backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H +#define LLVM_AARCH64_AARCH64FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { + namespace AArch64 { + enum Fixups { + fixup_a64_ld_prel = FirstTargetFixupKind, + fixup_a64_adr_prel, + fixup_a64_adr_prel_page, + + fixup_a64_add_lo12, + + fixup_a64_ldst8_lo12, + fixup_a64_ldst16_lo12, + fixup_a64_ldst32_lo12, + fixup_a64_ldst64_lo12, + fixup_a64_ldst128_lo12, + + fixup_a64_tstbr, + fixup_a64_condbr, + fixup_a64_uncondbr, + fixup_a64_call, + + fixup_a64_movw_uabs_g0, + fixup_a64_movw_uabs_g0_nc, + fixup_a64_movw_uabs_g1, + fixup_a64_movw_uabs_g1_nc, + fixup_a64_movw_uabs_g2, + fixup_a64_movw_uabs_g2_nc, + fixup_a64_movw_uabs_g3, + + fixup_a64_movw_sabs_g0, + fixup_a64_movw_sabs_g1, + fixup_a64_movw_sabs_g2, + + fixup_a64_adr_prel_got_page, + fixup_a64_ld64_got_lo12_nc, + + // Produce offsets relative to the module's dynamic TLS area. + fixup_a64_movw_dtprel_g2, + fixup_a64_movw_dtprel_g1, + fixup_a64_movw_dtprel_g1_nc, + fixup_a64_movw_dtprel_g0, + fixup_a64_movw_dtprel_g0_nc, + fixup_a64_add_dtprel_hi12, + fixup_a64_add_dtprel_lo12, + fixup_a64_add_dtprel_lo12_nc, + fixup_a64_ldst8_dtprel_lo12, + fixup_a64_ldst8_dtprel_lo12_nc, + fixup_a64_ldst16_dtprel_lo12, + fixup_a64_ldst16_dtprel_lo12_nc, + fixup_a64_ldst32_dtprel_lo12, + fixup_a64_ldst32_dtprel_lo12_nc, + fixup_a64_ldst64_dtprel_lo12, + fixup_a64_ldst64_dtprel_lo12_nc, + + // Produce the GOT entry containing a variable's address in TLS's + // initial-exec mode. + fixup_a64_movw_gottprel_g1, + fixup_a64_movw_gottprel_g0_nc, + fixup_a64_adr_gottprel_page, + fixup_a64_ld64_gottprel_lo12_nc, + fixup_a64_ld_gottprel_prel19, + + // Produce offsets relative to the thread pointer: TPIDR_EL0. + fixup_a64_movw_tprel_g2, + fixup_a64_movw_tprel_g1, + fixup_a64_movw_tprel_g1_nc, + fixup_a64_movw_tprel_g0, + fixup_a64_movw_tprel_g0_nc, + fixup_a64_add_tprel_hi12, + fixup_a64_add_tprel_lo12, + fixup_a64_add_tprel_lo12_nc, + fixup_a64_ldst8_tprel_lo12, + fixup_a64_ldst8_tprel_lo12_nc, + fixup_a64_ldst16_tprel_lo12, + fixup_a64_ldst16_tprel_lo12_nc, + fixup_a64_ldst32_tprel_lo12, + fixup_a64_ldst32_tprel_lo12_nc, + fixup_a64_ldst64_tprel_lo12, + fixup_a64_ldst64_tprel_lo12_nc, + + // Produce the special fixups used by the general-dynamic TLS model. + fixup_a64_tlsdesc_adr_page, + fixup_a64_tlsdesc_ld64_lo12_nc, + fixup_a64_tlsdesc_add_lo12_nc, + fixup_a64_tlsdesc_call, + + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; + } +} + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp new file mode 100644 index 0000000..8ec8cbf --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -0,0 +1,41 @@ +//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the AArch64MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCAsmInfo.h" + +using namespace llvm; + +AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() { + PointerSize = 8; + + // ".comm align is in bytes but .align is pow-2." + AlignmentIsInBytes = false; + + CommentString = "//"; + PrivateGlobalPrefix = ".L"; + Code32Directive = ".code\t32"; + + Data16bitsDirective = "\t.hword\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = "\t.xword\t"; + + UseDataRegionDirectives = true; + + WeakRefDirective = "\t.weak\t"; + + HasLEB128 = true; + SupportsDebugInformation = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h new file mode 100644 index 0000000..a20bc47 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -0,0 +1,27 @@ +//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the AArch64MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64TARGETASMINFO_H +#define LLVM_AARCH64TARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + + struct AArch64ELFMCAsmInfo : public MCAsmInfo { + explicit AArch64ELFMCAsmInfo(); + }; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp new file mode 100644 index 0000000..a5c591e --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -0,0 +1,502 @@ +//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64MCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class AArch64MCCodeEmitter : public MCCodeEmitter { + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; + MCContext &Ctx; + +public: + AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {} + + ~AArch64MCCodeEmitter() {} + + unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + template<int MemSize> + unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize); + } + + unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + int MemSize) const; + + unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + // Labels are handled mostly the same way: a symbol is needed, and + // just gets some fixup attached. + template<AArch64::Fixups fixupDesired> + unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + unsigned getAddressWithFixup(const MCOperand &MO, + unsigned FixupKind, + SmallVectorImpl<MCFixup> &Fixups) const; + + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + + + void EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; + } + + void EmitInstruction(uint32_t Val, raw_ostream &OS) const { + // Output the constant in little endian byte order. + for (unsigned i = 0; i != 4; ++i) { + EmitByte(Val & 0xff, OS); + Val >>= 8; + } + } + + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + template<int hasRs, int hasRt2> unsigned + fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const; + + unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const; + + unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const; + + +}; + +} // end anonymous namespace + +unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO, + unsigned FixupKind, + SmallVectorImpl<MCFixup> &Fixups) const { + if (!MO.isExpr()) { + // This can occur for manually decoded or constructed MCInsts, but neither + // the assembly-parser nor instruction selection will currently produce an + // MCInst that's not a symbol reference. + assert(MO.isImm() && "Unexpected address requested"); + return MO.getImm(); + } + + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(FixupKind); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + return 0; +} + +unsigned AArch64MCCodeEmitter:: +getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + int MemSize) const { + const MCOperand &ImmOp = MI.getOperand(OpIdx); + if (ImmOp.isImm()) + return ImmOp.getImm(); + + assert(ImmOp.isExpr() && "Unexpected operand type"); + const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr()); + unsigned FixupKind; + + + switch (Expr->getKind()) { + default: llvm_unreachable("Unexpected operand modifier"); + case AArch64MCExpr::VK_AARCH64_LO12: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, + AArch64::fixup_a64_ldst16_lo12, + AArch64::fixup_a64_ldst32_lo12, + AArch64::fixup_a64_ldst64_lo12, + AArch64::fixup_a64_ldst128_lo12 }; + assert(MemSize <= 16 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_GOT_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; + break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12, + AArch64::fixup_a64_ldst16_dtprel_lo12, + AArch64::fixup_a64_ldst32_dtprel_lo12, + AArch64::fixup_a64_ldst64_dtprel_lo12 }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc, + AArch64::fixup_a64_ldst16_dtprel_lo12_nc, + AArch64::fixup_a64_ldst32_dtprel_lo12_nc, + AArch64::fixup_a64_ldst64_dtprel_lo12_nc }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; + break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12, + AArch64::fixup_a64_ldst16_tprel_lo12, + AArch64::fixup_a64_ldst32_tprel_lo12, + AArch64::fixup_a64_ldst64_tprel_lo12 }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc, + AArch64::fixup_a64_ldst16_tprel_lo12_nc, + AArch64::fixup_a64_ldst32_tprel_lo12_nc, + AArch64::fixup_a64_ldst64_tprel_lo12_nc }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc; + break; + } + + return getAddressWithFixup(ImmOp, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + + assert(MO.isExpr()); + + unsigned FixupKind = 0; + switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) { + default: llvm_unreachable("Invalid expression modifier"); + case AArch64MCExpr::VK_AARCH64_LO12: + FixupKind = AArch64::fixup_a64_add_lo12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_HI12: + FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: + FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: + FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_HI12: + FixupKind = AArch64::fixup_a64_add_tprel_hi12; break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12: + FixupKind = AArch64::fixup_a64_add_tprel_lo12; break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: + FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break; + case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: + FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break; + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + + assert(MO.isExpr()); + + unsigned Modifier = AArch64MCExpr::VK_AARCH64_None; + if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr())) + Modifier = Expr->getKind(); + + unsigned FixupKind = 0; + switch(Modifier) { + case AArch64MCExpr::VK_AARCH64_None: + FixupKind = AArch64::fixup_a64_adr_prel_page; + break; + case AArch64MCExpr::VK_AARCH64_GOT: + FixupKind = AArch64::fixup_a64_adr_prel_got_page; + break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL: + FixupKind = AArch64::fixup_a64_adr_gottprel_page; + break; + case AArch64MCExpr::VK_AARCH64_TLSDESC: + FixupKind = AArch64::fixup_a64_tlsdesc_adr_page; + break; + default: + llvm_unreachable("Unknown symbol reference kind for ADRP instruction"); + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Only immediate expected for shift"); + + return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6; +} + +unsigned +AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Only immediate expected for shift"); + + return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; +} + + +template<AArch64::Fixups fixupDesired> unsigned +AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, + unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + if (MO.isExpr()) + return getAddressWithFixup(MO, fixupDesired, Fixups); + + assert(MO.isImm()); + return MO.getImm(); +} + +unsigned +AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI, + unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + if (MO.isImm()) + return MO.getImm(); + + assert(MO.isExpr()); + + unsigned FixupKind; + if (isa<AArch64MCExpr>(MO.getExpr())) { + assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind() + == AArch64MCExpr::VK_AARCH64_GOTTPREL + && "Invalid symbol modifier for literal load"); + FixupKind = AArch64::fixup_a64_ld_gottprel_prel19; + } else { + FixupKind = AArch64::fixup_a64_ld_prel; + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + + +unsigned +AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + } else if (MO.isImm()) { + return static_cast<unsigned>(MO.getImm()); + } + + llvm_unreachable("Unable to encode MCOperand!"); + return 0; +} + +unsigned +AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &UImm16MO = MI.getOperand(OpIdx); + const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1); + + unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16; + + if (UImm16MO.isImm()) { + Result |= UImm16MO.getImm(); + return Result; + } + + const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr()); + AArch64::Fixups requestedFixup; + switch (A64E->getKind()) { + default: llvm_unreachable("unexpected expression modifier"); + case AArch64MCExpr::VK_AARCH64_ABS_G0: + requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break; + case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G1: + requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break; + case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G2: + requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break; + case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G3: + requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break; + case AArch64MCExpr::VK_AARCH64_SABS_G0: + requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break; + case AArch64MCExpr::VK_AARCH64_SABS_G1: + requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break; + case AArch64MCExpr::VK_AARCH64_SABS_G2: + requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break; + } + + return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups); +} + +template<int hasRs, int hasRt2> unsigned +AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, + unsigned EncodedValue) const { + if (!hasRs) EncodedValue |= 0x001F0000; + if (!hasRt2) EncodedValue |= 0x00007C00; + + return EncodedValue; +} + +unsigned +AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const { + // If one of the signed fixup kinds is applied to a MOVZ instruction, the + // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's + // job to ensure that any bits possibly affected by this are 0. This means we + // must zero out bit 30 (essentially emitting a MOVN). + MCOperand UImm16MO = MI.getOperand(1); + + // Nothing to do if there's no fixup. + if (UImm16MO.isImm()) + return EncodedValue; + + const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr()); + switch (A64E->getKind()) { + case AArch64MCExpr::VK_AARCH64_SABS_G0: + case AArch64MCExpr::VK_AARCH64_SABS_G1: + case AArch64MCExpr::VK_AARCH64_SABS_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + return EncodedValue & ~(1u << 30); + default: + // Nothing to do for an unsigned fixup. + return EncodedValue; + } + + llvm_unreachable("Should have returned by now"); +} + +unsigned +AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI, + unsigned EncodedValue) const { + // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 + // (i.e. all bits 1) but is ignored by the processor. + EncodedValue |= 0x1f << 10; + return EncodedValue; +} + +MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new AArch64MCCodeEmitter(Ctx); +} + +void AArch64MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MI.getOpcode() == AArch64::TLSDESCCALL) { + // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the + // following (BLR) instruction. It doesn't emit any code itself so it + // doesn't go through the normal TableGenerated channels. + MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call); + const MCExpr *Expr; + Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx); + Fixups.push_back(MCFixup::Create(0, Expr, Fixup)); + return; + } + + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + + EmitInstruction(Binary, OS); +} + + +#include "AArch64GenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp new file mode 100644 index 0000000..c1abfe7 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -0,0 +1,178 @@ +//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the assembly expression modifiers +// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64mcexpr" +#include "AArch64MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELF.h" +#include "llvm/Object/ELF.h" + +using namespace llvm; + +const AArch64MCExpr* +AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) AArch64MCExpr(Kind, Expr); +} + +void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { + switch (Kind) { + default: llvm_unreachable("Invalid kind!"); + case VK_AARCH64_GOT: OS << ":got:"; break; + case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break; + case VK_AARCH64_LO12: OS << ":lo12:"; break; + case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break; + case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break; + case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break; + case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break; + case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break; + case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break; + case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break; + case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break; + case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break; + case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break; + case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break; + case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break; + case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break; + case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break; + case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break; + case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break; + case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break; + case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break; + case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break; + case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break; + case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break; + case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break; + case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break; + case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break; + case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break; + case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break; + case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break; + case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break; + case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break; + case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break; + case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break; + case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break; + + } + + const MCExpr *Expr = getSubExpr(); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << '('; + Expr->print(OS); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << ')'; +} + +bool +AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + return getSubExpr()->EvaluateAsRelocatable(Res, *Layout); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expression"); + break; + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: { + // We're known to be under a TLS fixup, so any symbol should be + // modified. There should be only one. + const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); + MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); + MCELF::SetType(SD, ELF::STT_TLS); + break; + } + + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm); + break; + } +} + +void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + default: + return; + case VK_AARCH64_DTPREL_G2: + case VK_AARCH64_DTPREL_G1: + case VK_AARCH64_DTPREL_G1_NC: + case VK_AARCH64_DTPREL_G0: + case VK_AARCH64_DTPREL_G0_NC: + case VK_AARCH64_DTPREL_HI12: + case VK_AARCH64_DTPREL_LO12: + case VK_AARCH64_DTPREL_LO12_NC: + case VK_AARCH64_GOTTPREL_G1: + case VK_AARCH64_GOTTPREL_G0_NC: + case VK_AARCH64_GOTTPREL: + case VK_AARCH64_GOTTPREL_LO12: + case VK_AARCH64_TPREL_G2: + case VK_AARCH64_TPREL_G1: + case VK_AARCH64_TPREL_G1_NC: + case VK_AARCH64_TPREL_G0: + case VK_AARCH64_TPREL_G0_NC: + case VK_AARCH64_TPREL_HI12: + case VK_AARCH64_TPREL_LO12: + case VK_AARCH64_TPREL_LO12_NC: + case VK_AARCH64_TLSDESC: + case VK_AARCH64_TLSDESC_LO12: + break; + } + + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); +} + +// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps +// that method should be made public? +// FIXME: really do above: now that two backends are using it. +static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { + switch (Value->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); + AddValueSymbolsImpl(BE->getLHS(), Asm); + AddValueSymbolsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: + Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); + break; + } +} + +void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { + AddValueSymbolsImpl(getSubExpr(), Asm); +} diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h new file mode 100644 index 0000000..c0e3b29 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -0,0 +1,167 @@ +//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes AArch64-specific MCExprs, used for modifiers like +// ":lo12:" or ":gottprel_g1:". +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64MCEXPR_H +#define LLVM_AARCH64MCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class AArch64MCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_AARCH64_None, + VK_AARCH64_GOT, // :got: modifier in assembly + VK_AARCH64_GOT_LO12, // :got_lo12: + VK_AARCH64_LO12, // :lo12: + + VK_AARCH64_ABS_G0, // :abs_g0: + VK_AARCH64_ABS_G0_NC, // :abs_g0_nc: + VK_AARCH64_ABS_G1, + VK_AARCH64_ABS_G1_NC, + VK_AARCH64_ABS_G2, + VK_AARCH64_ABS_G2_NC, + VK_AARCH64_ABS_G3, + + VK_AARCH64_SABS_G0, // :abs_g0_s: + VK_AARCH64_SABS_G1, + VK_AARCH64_SABS_G2, + + VK_AARCH64_DTPREL_G2, // :dtprel_g2: + VK_AARCH64_DTPREL_G1, + VK_AARCH64_DTPREL_G1_NC, + VK_AARCH64_DTPREL_G0, + VK_AARCH64_DTPREL_G0_NC, + VK_AARCH64_DTPREL_HI12, + VK_AARCH64_DTPREL_LO12, + VK_AARCH64_DTPREL_LO12_NC, + + VK_AARCH64_GOTTPREL_G1, // :gottprel: + VK_AARCH64_GOTTPREL_G0_NC, + VK_AARCH64_GOTTPREL, + VK_AARCH64_GOTTPREL_LO12, + + VK_AARCH64_TPREL_G2, // :tprel: + VK_AARCH64_TPREL_G1, + VK_AARCH64_TPREL_G1_NC, + VK_AARCH64_TPREL_G0, + VK_AARCH64_TPREL_G0_NC, + VK_AARCH64_TPREL_HI12, + VK_AARCH64_TPREL_LO12, + VK_AARCH64_TPREL_LO12_NC, + + VK_AARCH64_TLSDESC, // :tlsdesc: + VK_AARCH64_TLSDESC_LO12 + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr) + : Kind(_Kind), Expr(_Expr) {} + +public: + /// @name Construction + /// @{ + + static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_GOT, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOT_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx); + } + + static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOTTPREL, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TLSDESC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TPREL_G1, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); + } + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// @} + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const; + void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const AArch64MCExpr *) { return true; } + +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp new file mode 100644 index 0000000..7960db0 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -0,0 +1,194 @@ +//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCTargetDesc.h" +#include "AArch64ELFStreamer.h" +#include "AArch64MCAsmInfo.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/ADT/APInt.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" + +#define GET_REGINFO_MC_DESC +#include "AArch64GenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AArch64GenSubtargetInfo.inc" + +using namespace llvm; + +MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitAArch64MCSubtargetInfo(X, TT, CPU, ""); + return X; +} + + +static MCInstrInfo *createAArch64MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAArch64MCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAArch64MCRegisterInfo(X, AArch64::X30); + return X; +} + +static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) { + // On ELF platforms the default static relocation model has a smart enough + // linker to cope with referencing external symbols defined in a shared + // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. + RM = Reloc::Static; + } + + if (CM == CodeModel::Default) + CM = CodeModel::Small; + + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + + +static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new AArch64InstPrinter(MAI, MII, MRI, STI); + return 0; +} + +namespace { + +class AArch64MCInstrAnalysis : public MCInstrAnalysis { +public: + AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + virtual bool isUnconditionalBranch(const MCInst &Inst) const { + if (Inst.getOpcode() == AArch64::Bcc + && Inst.getOperand(0).getImm() == A64CC::AL) + return true; + return MCInstrAnalysis::isUnconditionalBranch(Inst); + } + + virtual bool isConditionalBranch(const MCInst &Inst) const { + if (Inst.getOpcode() == AArch64::Bcc + && Inst.getOperand(0).getImm() == A64CC::AL) + return false; + return MCInstrAnalysis::isConditionalBranch(Inst); + } + + uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; + // FIXME: We only handle PCRel branches for now. + if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType + != MCOI::OPERAND_PCREL) + return -1ULL; + + int64_t Imm = Inst.getOperand(LblOperand).getImm(); + + return Addr + Imm; + } +}; + +} + +static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) { + return new AArch64MCInstrAnalysis(Info); +} + + + +extern "C" void LLVMInitializeAArch64TargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target, + createAArch64MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheAArch64Target, + createAArch64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheAArch64Target, + createAArch64MCRegisterInfo); + + // Register the MC subtarget info. + using AArch64_MC::createAArch64MCSubtargetInfo; + TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target, + createAArch64MCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target, + createAArch64MCInstrAnalysis); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target, + createAArch64MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheAArch64Target, + createAArch64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target, + createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheAArch64Target, + createAArch64MCInstPrinter); +} diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h new file mode 100644 index 0000000..3849fe3 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -0,0 +1,65 @@ +//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64MCTARGETDESC_H +#define LLVM_AARCH64MCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class StringRef; +class Target; +class raw_ostream; + +extern Target TheAArch64Target; + +namespace AArch64_MC { + MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS); +} + +MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, + uint8_t OSABI); + +MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT, + StringRef CPU); + +} // End llvm namespace + +// Defines symbolic names for AArch64 registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "AArch64GenRegisterInfo.inc" + +// Defines symbolic names for the AArch64 instructions. +// +#define GET_INSTRINFO_ENUM +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "AArch64GenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/AArch64/README.txt b/contrib/llvm/lib/Target/AArch64/README.txt new file mode 100644 index 0000000..601990f --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/README.txt @@ -0,0 +1,2 @@ +This file will contain changes that need to be made before AArch64 can become an +officially supported target. Currently a placeholder. diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp new file mode 100644 index 0000000..b8099cb --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -0,0 +1,24 @@ +//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the key registration step for the architecture. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheAArch64Target; + +extern "C" void LLVMInitializeAArch64TargetInfo() { + RegisterTarget<Triple::aarch64> + X(TheAArch64Target, "aarch64", "AArch64"); +} diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp new file mode 100644 index 0000000..1678559 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -0,0 +1,1103 @@ +//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides basic encoding and assembly information for AArch64. +// +//===----------------------------------------------------------------------===// +#include "AArch64BaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Regex.h" + +using namespace llvm; + +StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Value == Value) { + Valid = true; + return Pairs[i].Name; + } + } + + Valid = false; + return StringRef(); +} + +uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { + std::string LowerCaseName = Name.lower(); + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Name == LowerCaseName) { + Valid = true; + return Pairs[i].Value; + } + } + + Valid = false; + return -1; +} + +bool NamedImmMapper::validImm(uint32_t Value) const { + return Value < TooBigImm; +} + +const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { + {"s1e1r", S1E1R}, + {"s1e2r", S1E2R}, + {"s1e3r", S1E3R}, + {"s1e1w", S1E1W}, + {"s1e2w", S1E2W}, + {"s1e3w", S1E3W}, + {"s1e0r", S1E0R}, + {"s1e0w", S1E0W}, + {"s12e1r", S12E1R}, + {"s12e1w", S12E1W}, + {"s12e0r", S12E0R}, + {"s12e0w", S12E0W}, +}; + +A64AT::ATMapper::ATMapper() + : NamedImmMapper(ATPairs, 0) {} + +const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { + {"oshld", OSHLD}, + {"oshst", OSHST}, + {"osh", OSH}, + {"nshld", NSHLD}, + {"nshst", NSHST}, + {"nsh", NSH}, + {"ishld", ISHLD}, + {"ishst", ISHST}, + {"ish", ISH}, + {"ld", LD}, + {"st", ST}, + {"sy", SY} +}; + +A64DB::DBarrierMapper::DBarrierMapper() + : NamedImmMapper(DBarrierPairs, 16u) {} + +const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { + {"zva", ZVA}, + {"ivac", IVAC}, + {"isw", ISW}, + {"cvac", CVAC}, + {"csw", CSW}, + {"cvau", CVAU}, + {"civac", CIVAC}, + {"cisw", CISW} +}; + +A64DC::DCMapper::DCMapper() + : NamedImmMapper(DCPairs, 0) {} + +const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { + {"ialluis", IALLUIS}, + {"iallu", IALLU}, + {"ivau", IVAU} +}; + +A64IC::ICMapper::ICMapper() + : NamedImmMapper(ICPairs, 0) {} + +const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { + {"sy", SY}, +}; + +A64ISB::ISBMapper::ISBMapper() + : NamedImmMapper(ISBPairs, 16) {} + +const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { + {"pldl1keep", PLDL1KEEP}, + {"pldl1strm", PLDL1STRM}, + {"pldl2keep", PLDL2KEEP}, + {"pldl2strm", PLDL2STRM}, + {"pldl3keep", PLDL3KEEP}, + {"pldl3strm", PLDL3STRM}, + {"plil1keep", PLIL1KEEP}, + {"plil1strm", PLIL1STRM}, + {"plil2keep", PLIL2KEEP}, + {"plil2strm", PLIL2STRM}, + {"plil3keep", PLIL3KEEP}, + {"plil3strm", PLIL3STRM}, + {"pstl1keep", PSTL1KEEP}, + {"pstl1strm", PSTL1STRM}, + {"pstl2keep", PSTL2KEEP}, + {"pstl2strm", PSTL2STRM}, + {"pstl3keep", PSTL3KEEP}, + {"pstl3strm", PSTL3STRM} +}; + +A64PRFM::PRFMMapper::PRFMMapper() + : NamedImmMapper(PRFMPairs, 32) {} + +const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { + {"spsel", SPSel}, + {"daifset", DAIFSet}, + {"daifclr", DAIFClr} +}; + +A64PState::PStateMapper::PStateMapper() + : NamedImmMapper(PStatePairs, 0) {} + +const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { + {"mdccsr_el0", MDCCSR_EL0}, + {"dbgdtrrx_el0", DBGDTRRX_EL0}, + {"mdrar_el1", MDRAR_EL1}, + {"oslsr_el1", OSLSR_EL1}, + {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, + {"pmceid0_el0", PMCEID0_EL0}, + {"pmceid1_el0", PMCEID1_EL0}, + {"midr_el1", MIDR_EL1}, + {"ccsidr_el1", CCSIDR_EL1}, + {"clidr_el1", CLIDR_EL1}, + {"ctr_el0", CTR_EL0}, + {"mpidr_el1", MPIDR_EL1}, + {"revidr_el1", REVIDR_EL1}, + {"aidr_el1", AIDR_EL1}, + {"dczid_el0", DCZID_EL0}, + {"id_pfr0_el1", ID_PFR0_EL1}, + {"id_pfr1_el1", ID_PFR1_EL1}, + {"id_dfr0_el1", ID_DFR0_EL1}, + {"id_afr0_el1", ID_AFR0_EL1}, + {"id_mmfr0_el1", ID_MMFR0_EL1}, + {"id_mmfr1_el1", ID_MMFR1_EL1}, + {"id_mmfr2_el1", ID_MMFR2_EL1}, + {"id_mmfr3_el1", ID_MMFR3_EL1}, + {"id_isar0_el1", ID_ISAR0_EL1}, + {"id_isar1_el1", ID_ISAR1_EL1}, + {"id_isar2_el1", ID_ISAR2_EL1}, + {"id_isar3_el1", ID_ISAR3_EL1}, + {"id_isar4_el1", ID_ISAR4_EL1}, + {"id_isar5_el1", ID_ISAR5_EL1}, + {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, + {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, + {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, + {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, + {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, + {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, + {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, + {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, + {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, + {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, + {"mvfr0_el1", MVFR0_EL1}, + {"mvfr1_el1", MVFR1_EL1}, + {"mvfr2_el1", MVFR2_EL1}, + {"rvbar_el1", RVBAR_EL1}, + {"rvbar_el2", RVBAR_EL2}, + {"rvbar_el3", RVBAR_EL3}, + {"isr_el1", ISR_EL1}, + {"cntpct_el0", CNTPCT_EL0}, + {"cntvct_el0", CNTVCT_EL0}, + + // Trace registers + {"trcstatr", TRCSTATR}, + {"trcidr8", TRCIDR8}, + {"trcidr9", TRCIDR9}, + {"trcidr10", TRCIDR10}, + {"trcidr11", TRCIDR11}, + {"trcidr12", TRCIDR12}, + {"trcidr13", TRCIDR13}, + {"trcidr0", TRCIDR0}, + {"trcidr1", TRCIDR1}, + {"trcidr2", TRCIDR2}, + {"trcidr3", TRCIDR3}, + {"trcidr4", TRCIDR4}, + {"trcidr5", TRCIDR5}, + {"trcidr6", TRCIDR6}, + {"trcidr7", TRCIDR7}, + {"trcoslsr", TRCOSLSR}, + {"trcpdsr", TRCPDSR}, + {"trcdevaff0", TRCDEVAFF0}, + {"trcdevaff1", TRCDEVAFF1}, + {"trclsr", TRCLSR}, + {"trcauthstatus", TRCAUTHSTATUS}, + {"trcdevarch", TRCDEVARCH}, + {"trcdevid", TRCDEVID}, + {"trcdevtype", TRCDEVTYPE}, + {"trcpidr4", TRCPIDR4}, + {"trcpidr5", TRCPIDR5}, + {"trcpidr6", TRCPIDR6}, + {"trcpidr7", TRCPIDR7}, + {"trcpidr0", TRCPIDR0}, + {"trcpidr1", TRCPIDR1}, + {"trcpidr2", TRCPIDR2}, + {"trcpidr3", TRCPIDR3}, + {"trccidr0", TRCCIDR0}, + {"trccidr1", TRCCIDR1}, + {"trccidr2", TRCCIDR2}, + {"trccidr3", TRCCIDR3}, + + // GICv3 registers + {"icc_iar1_el1", ICC_IAR1_EL1}, + {"icc_iar0_el1", ICC_IAR0_EL1}, + {"icc_hppir1_el1", ICC_HPPIR1_EL1}, + {"icc_hppir0_el1", ICC_HPPIR0_EL1}, + {"icc_rpr_el1", ICC_RPR_EL1}, + {"ich_vtr_el2", ICH_VTR_EL2}, + {"ich_eisr_el2", ICH_EISR_EL2}, + {"ich_elsr_el2", ICH_ELSR_EL2} +}; + +A64SysReg::MRSMapper::MRSMapper() { + InstPairs = &MRSPairs[0]; + NumInstPairs = llvm::array_lengthof(MRSPairs); +} + +const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { + {"dbgdtrtx_el0", DBGDTRTX_EL0}, + {"oslar_el1", OSLAR_EL1}, + {"pmswinc_el0", PMSWINC_EL0}, + + // Trace registers + {"trcoslar", TRCOSLAR}, + {"trclar", TRCLAR}, + + // GICv3 registers + {"icc_eoir1_el1", ICC_EOIR1_EL1}, + {"icc_eoir0_el1", ICC_EOIR0_EL1}, + {"icc_dir_el1", ICC_DIR_EL1}, + {"icc_sgi1r_el1", ICC_SGI1R_EL1}, + {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, + {"icc_sgi0r_el1", ICC_SGI0R_EL1} +}; + +A64SysReg::MSRMapper::MSRMapper() { + InstPairs = &MSRPairs[0]; + NumInstPairs = llvm::array_lengthof(MSRPairs); +} + + +const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { + {"osdtrrx_el1", OSDTRRX_EL1}, + {"osdtrtx_el1", OSDTRTX_EL1}, + {"teecr32_el1", TEECR32_EL1}, + {"mdccint_el1", MDCCINT_EL1}, + {"mdscr_el1", MDSCR_EL1}, + {"dbgdtr_el0", DBGDTR_EL0}, + {"oseccr_el1", OSECCR_EL1}, + {"dbgvcr32_el2", DBGVCR32_EL2}, + {"dbgbvr0_el1", DBGBVR0_EL1}, + {"dbgbvr1_el1", DBGBVR1_EL1}, + {"dbgbvr2_el1", DBGBVR2_EL1}, + {"dbgbvr3_el1", DBGBVR3_EL1}, + {"dbgbvr4_el1", DBGBVR4_EL1}, + {"dbgbvr5_el1", DBGBVR5_EL1}, + {"dbgbvr6_el1", DBGBVR6_EL1}, + {"dbgbvr7_el1", DBGBVR7_EL1}, + {"dbgbvr8_el1", DBGBVR8_EL1}, + {"dbgbvr9_el1", DBGBVR9_EL1}, + {"dbgbvr10_el1", DBGBVR10_EL1}, + {"dbgbvr11_el1", DBGBVR11_EL1}, + {"dbgbvr12_el1", DBGBVR12_EL1}, + {"dbgbvr13_el1", DBGBVR13_EL1}, + {"dbgbvr14_el1", DBGBVR14_EL1}, + {"dbgbvr15_el1", DBGBVR15_EL1}, + {"dbgbcr0_el1", DBGBCR0_EL1}, + {"dbgbcr1_el1", DBGBCR1_EL1}, + {"dbgbcr2_el1", DBGBCR2_EL1}, + {"dbgbcr3_el1", DBGBCR3_EL1}, + {"dbgbcr4_el1", DBGBCR4_EL1}, + {"dbgbcr5_el1", DBGBCR5_EL1}, + {"dbgbcr6_el1", DBGBCR6_EL1}, + {"dbgbcr7_el1", DBGBCR7_EL1}, + {"dbgbcr8_el1", DBGBCR8_EL1}, + {"dbgbcr9_el1", DBGBCR9_EL1}, + {"dbgbcr10_el1", DBGBCR10_EL1}, + {"dbgbcr11_el1", DBGBCR11_EL1}, + {"dbgbcr12_el1", DBGBCR12_EL1}, + {"dbgbcr13_el1", DBGBCR13_EL1}, + {"dbgbcr14_el1", DBGBCR14_EL1}, + {"dbgbcr15_el1", DBGBCR15_EL1}, + {"dbgwvr0_el1", DBGWVR0_EL1}, + {"dbgwvr1_el1", DBGWVR1_EL1}, + {"dbgwvr2_el1", DBGWVR2_EL1}, + {"dbgwvr3_el1", DBGWVR3_EL1}, + {"dbgwvr4_el1", DBGWVR4_EL1}, + {"dbgwvr5_el1", DBGWVR5_EL1}, + {"dbgwvr6_el1", DBGWVR6_EL1}, + {"dbgwvr7_el1", DBGWVR7_EL1}, + {"dbgwvr8_el1", DBGWVR8_EL1}, + {"dbgwvr9_el1", DBGWVR9_EL1}, + {"dbgwvr10_el1", DBGWVR10_EL1}, + {"dbgwvr11_el1", DBGWVR11_EL1}, + {"dbgwvr12_el1", DBGWVR12_EL1}, + {"dbgwvr13_el1", DBGWVR13_EL1}, + {"dbgwvr14_el1", DBGWVR14_EL1}, + {"dbgwvr15_el1", DBGWVR15_EL1}, + {"dbgwcr0_el1", DBGWCR0_EL1}, + {"dbgwcr1_el1", DBGWCR1_EL1}, + {"dbgwcr2_el1", DBGWCR2_EL1}, + {"dbgwcr3_el1", DBGWCR3_EL1}, + {"dbgwcr4_el1", DBGWCR4_EL1}, + {"dbgwcr5_el1", DBGWCR5_EL1}, + {"dbgwcr6_el1", DBGWCR6_EL1}, + {"dbgwcr7_el1", DBGWCR7_EL1}, + {"dbgwcr8_el1", DBGWCR8_EL1}, + {"dbgwcr9_el1", DBGWCR9_EL1}, + {"dbgwcr10_el1", DBGWCR10_EL1}, + {"dbgwcr11_el1", DBGWCR11_EL1}, + {"dbgwcr12_el1", DBGWCR12_EL1}, + {"dbgwcr13_el1", DBGWCR13_EL1}, + {"dbgwcr14_el1", DBGWCR14_EL1}, + {"dbgwcr15_el1", DBGWCR15_EL1}, + {"teehbr32_el1", TEEHBR32_EL1}, + {"osdlr_el1", OSDLR_EL1}, + {"dbgprcr_el1", DBGPRCR_EL1}, + {"dbgclaimset_el1", DBGCLAIMSET_EL1}, + {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, + {"csselr_el1", CSSELR_EL1}, + {"vpidr_el2", VPIDR_EL2}, + {"vmpidr_el2", VMPIDR_EL2}, + {"sctlr_el1", SCTLR_EL1}, + {"sctlr_el2", SCTLR_EL2}, + {"sctlr_el3", SCTLR_EL3}, + {"actlr_el1", ACTLR_EL1}, + {"actlr_el2", ACTLR_EL2}, + {"actlr_el3", ACTLR_EL3}, + {"cpacr_el1", CPACR_EL1}, + {"hcr_el2", HCR_EL2}, + {"scr_el3", SCR_EL3}, + {"mdcr_el2", MDCR_EL2}, + {"sder32_el3", SDER32_EL3}, + {"cptr_el2", CPTR_EL2}, + {"cptr_el3", CPTR_EL3}, + {"hstr_el2", HSTR_EL2}, + {"hacr_el2", HACR_EL2}, + {"mdcr_el3", MDCR_EL3}, + {"ttbr0_el1", TTBR0_EL1}, + {"ttbr0_el2", TTBR0_EL2}, + {"ttbr0_el3", TTBR0_EL3}, + {"ttbr1_el1", TTBR1_EL1}, + {"tcr_el1", TCR_EL1}, + {"tcr_el2", TCR_EL2}, + {"tcr_el3", TCR_EL3}, + {"vttbr_el2", VTTBR_EL2}, + {"vtcr_el2", VTCR_EL2}, + {"dacr32_el2", DACR32_EL2}, + {"spsr_el1", SPSR_EL1}, + {"spsr_el2", SPSR_EL2}, + {"spsr_el3", SPSR_EL3}, + {"elr_el1", ELR_EL1}, + {"elr_el2", ELR_EL2}, + {"elr_el3", ELR_EL3}, + {"sp_el0", SP_EL0}, + {"sp_el1", SP_EL1}, + {"sp_el2", SP_EL2}, + {"spsel", SPSel}, + {"nzcv", NZCV}, + {"daif", DAIF}, + {"currentel", CurrentEL}, + {"spsr_irq", SPSR_irq}, + {"spsr_abt", SPSR_abt}, + {"spsr_und", SPSR_und}, + {"spsr_fiq", SPSR_fiq}, + {"fpcr", FPCR}, + {"fpsr", FPSR}, + {"dspsr_el0", DSPSR_EL0}, + {"dlr_el0", DLR_EL0}, + {"ifsr32_el2", IFSR32_EL2}, + {"afsr0_el1", AFSR0_EL1}, + {"afsr0_el2", AFSR0_EL2}, + {"afsr0_el3", AFSR0_EL3}, + {"afsr1_el1", AFSR1_EL1}, + {"afsr1_el2", AFSR1_EL2}, + {"afsr1_el3", AFSR1_EL3}, + {"esr_el1", ESR_EL1}, + {"esr_el2", ESR_EL2}, + {"esr_el3", ESR_EL3}, + {"fpexc32_el2", FPEXC32_EL2}, + {"far_el1", FAR_EL1}, + {"far_el2", FAR_EL2}, + {"far_el3", FAR_EL3}, + {"hpfar_el2", HPFAR_EL2}, + {"par_el1", PAR_EL1}, + {"pmcr_el0", PMCR_EL0}, + {"pmcntenset_el0", PMCNTENSET_EL0}, + {"pmcntenclr_el0", PMCNTENCLR_EL0}, + {"pmovsclr_el0", PMOVSCLR_EL0}, + {"pmselr_el0", PMSELR_EL0}, + {"pmccntr_el0", PMCCNTR_EL0}, + {"pmxevtyper_el0", PMXEVTYPER_EL0}, + {"pmxevcntr_el0", PMXEVCNTR_EL0}, + {"pmuserenr_el0", PMUSERENR_EL0}, + {"pmintenset_el1", PMINTENSET_EL1}, + {"pmintenclr_el1", PMINTENCLR_EL1}, + {"pmovsset_el0", PMOVSSET_EL0}, + {"mair_el1", MAIR_EL1}, + {"mair_el2", MAIR_EL2}, + {"mair_el3", MAIR_EL3}, + {"amair_el1", AMAIR_EL1}, + {"amair_el2", AMAIR_EL2}, + {"amair_el3", AMAIR_EL3}, + {"vbar_el1", VBAR_EL1}, + {"vbar_el2", VBAR_EL2}, + {"vbar_el3", VBAR_EL3}, + {"rmr_el1", RMR_EL1}, + {"rmr_el2", RMR_EL2}, + {"rmr_el3", RMR_EL3}, + {"contextidr_el1", CONTEXTIDR_EL1}, + {"tpidr_el0", TPIDR_EL0}, + {"tpidr_el2", TPIDR_EL2}, + {"tpidr_el3", TPIDR_EL3}, + {"tpidrro_el0", TPIDRRO_EL0}, + {"tpidr_el1", TPIDR_EL1}, + {"cntfrq_el0", CNTFRQ_EL0}, + {"cntvoff_el2", CNTVOFF_EL2}, + {"cntkctl_el1", CNTKCTL_EL1}, + {"cnthctl_el2", CNTHCTL_EL2}, + {"cntp_tval_el0", CNTP_TVAL_EL0}, + {"cnthp_tval_el2", CNTHP_TVAL_EL2}, + {"cntps_tval_el1", CNTPS_TVAL_EL1}, + {"cntp_ctl_el0", CNTP_CTL_EL0}, + {"cnthp_ctl_el2", CNTHP_CTL_EL2}, + {"cntps_ctl_el1", CNTPS_CTL_EL1}, + {"cntp_cval_el0", CNTP_CVAL_EL0}, + {"cnthp_cval_el2", CNTHP_CVAL_EL2}, + {"cntps_cval_el1", CNTPS_CVAL_EL1}, + {"cntv_tval_el0", CNTV_TVAL_EL0}, + {"cntv_ctl_el0", CNTV_CTL_EL0}, + {"cntv_cval_el0", CNTV_CVAL_EL0}, + {"pmevcntr0_el0", PMEVCNTR0_EL0}, + {"pmevcntr1_el0", PMEVCNTR1_EL0}, + {"pmevcntr2_el0", PMEVCNTR2_EL0}, + {"pmevcntr3_el0", PMEVCNTR3_EL0}, + {"pmevcntr4_el0", PMEVCNTR4_EL0}, + {"pmevcntr5_el0", PMEVCNTR5_EL0}, + {"pmevcntr6_el0", PMEVCNTR6_EL0}, + {"pmevcntr7_el0", PMEVCNTR7_EL0}, + {"pmevcntr8_el0", PMEVCNTR8_EL0}, + {"pmevcntr9_el0", PMEVCNTR9_EL0}, + {"pmevcntr10_el0", PMEVCNTR10_EL0}, + {"pmevcntr11_el0", PMEVCNTR11_EL0}, + {"pmevcntr12_el0", PMEVCNTR12_EL0}, + {"pmevcntr13_el0", PMEVCNTR13_EL0}, + {"pmevcntr14_el0", PMEVCNTR14_EL0}, + {"pmevcntr15_el0", PMEVCNTR15_EL0}, + {"pmevcntr16_el0", PMEVCNTR16_EL0}, + {"pmevcntr17_el0", PMEVCNTR17_EL0}, + {"pmevcntr18_el0", PMEVCNTR18_EL0}, + {"pmevcntr19_el0", PMEVCNTR19_EL0}, + {"pmevcntr20_el0", PMEVCNTR20_EL0}, + {"pmevcntr21_el0", PMEVCNTR21_EL0}, + {"pmevcntr22_el0", PMEVCNTR22_EL0}, + {"pmevcntr23_el0", PMEVCNTR23_EL0}, + {"pmevcntr24_el0", PMEVCNTR24_EL0}, + {"pmevcntr25_el0", PMEVCNTR25_EL0}, + {"pmevcntr26_el0", PMEVCNTR26_EL0}, + {"pmevcntr27_el0", PMEVCNTR27_EL0}, + {"pmevcntr28_el0", PMEVCNTR28_EL0}, + {"pmevcntr29_el0", PMEVCNTR29_EL0}, + {"pmevcntr30_el0", PMEVCNTR30_EL0}, + {"pmccfiltr_el0", PMCCFILTR_EL0}, + {"pmevtyper0_el0", PMEVTYPER0_EL0}, + {"pmevtyper1_el0", PMEVTYPER1_EL0}, + {"pmevtyper2_el0", PMEVTYPER2_EL0}, + {"pmevtyper3_el0", PMEVTYPER3_EL0}, + {"pmevtyper4_el0", PMEVTYPER4_EL0}, + {"pmevtyper5_el0", PMEVTYPER5_EL0}, + {"pmevtyper6_el0", PMEVTYPER6_EL0}, + {"pmevtyper7_el0", PMEVTYPER7_EL0}, + {"pmevtyper8_el0", PMEVTYPER8_EL0}, + {"pmevtyper9_el0", PMEVTYPER9_EL0}, + {"pmevtyper10_el0", PMEVTYPER10_EL0}, + {"pmevtyper11_el0", PMEVTYPER11_EL0}, + {"pmevtyper12_el0", PMEVTYPER12_EL0}, + {"pmevtyper13_el0", PMEVTYPER13_EL0}, + {"pmevtyper14_el0", PMEVTYPER14_EL0}, + {"pmevtyper15_el0", PMEVTYPER15_EL0}, + {"pmevtyper16_el0", PMEVTYPER16_EL0}, + {"pmevtyper17_el0", PMEVTYPER17_EL0}, + {"pmevtyper18_el0", PMEVTYPER18_EL0}, + {"pmevtyper19_el0", PMEVTYPER19_EL0}, + {"pmevtyper20_el0", PMEVTYPER20_EL0}, + {"pmevtyper21_el0", PMEVTYPER21_EL0}, + {"pmevtyper22_el0", PMEVTYPER22_EL0}, + {"pmevtyper23_el0", PMEVTYPER23_EL0}, + {"pmevtyper24_el0", PMEVTYPER24_EL0}, + {"pmevtyper25_el0", PMEVTYPER25_EL0}, + {"pmevtyper26_el0", PMEVTYPER26_EL0}, + {"pmevtyper27_el0", PMEVTYPER27_EL0}, + {"pmevtyper28_el0", PMEVTYPER28_EL0}, + {"pmevtyper29_el0", PMEVTYPER29_EL0}, + {"pmevtyper30_el0", PMEVTYPER30_EL0}, + + // Trace registers + {"trcprgctlr", TRCPRGCTLR}, + {"trcprocselr", TRCPROCSELR}, + {"trcconfigr", TRCCONFIGR}, + {"trcauxctlr", TRCAUXCTLR}, + {"trceventctl0r", TRCEVENTCTL0R}, + {"trceventctl1r", TRCEVENTCTL1R}, + {"trcstallctlr", TRCSTALLCTLR}, + {"trctsctlr", TRCTSCTLR}, + {"trcsyncpr", TRCSYNCPR}, + {"trcccctlr", TRCCCCTLR}, + {"trcbbctlr", TRCBBCTLR}, + {"trctraceidr", TRCTRACEIDR}, + {"trcqctlr", TRCQCTLR}, + {"trcvictlr", TRCVICTLR}, + {"trcviiectlr", TRCVIIECTLR}, + {"trcvissctlr", TRCVISSCTLR}, + {"trcvipcssctlr", TRCVIPCSSCTLR}, + {"trcvdctlr", TRCVDCTLR}, + {"trcvdsacctlr", TRCVDSACCTLR}, + {"trcvdarcctlr", TRCVDARCCTLR}, + {"trcseqevr0", TRCSEQEVR0}, + {"trcseqevr1", TRCSEQEVR1}, + {"trcseqevr2", TRCSEQEVR2}, + {"trcseqrstevr", TRCSEQRSTEVR}, + {"trcseqstr", TRCSEQSTR}, + {"trcextinselr", TRCEXTINSELR}, + {"trccntrldvr0", TRCCNTRLDVR0}, + {"trccntrldvr1", TRCCNTRLDVR1}, + {"trccntrldvr2", TRCCNTRLDVR2}, + {"trccntrldvr3", TRCCNTRLDVR3}, + {"trccntctlr0", TRCCNTCTLR0}, + {"trccntctlr1", TRCCNTCTLR1}, + {"trccntctlr2", TRCCNTCTLR2}, + {"trccntctlr3", TRCCNTCTLR3}, + {"trccntvr0", TRCCNTVR0}, + {"trccntvr1", TRCCNTVR1}, + {"trccntvr2", TRCCNTVR2}, + {"trccntvr3", TRCCNTVR3}, + {"trcimspec0", TRCIMSPEC0}, + {"trcimspec1", TRCIMSPEC1}, + {"trcimspec2", TRCIMSPEC2}, + {"trcimspec3", TRCIMSPEC3}, + {"trcimspec4", TRCIMSPEC4}, + {"trcimspec5", TRCIMSPEC5}, + {"trcimspec6", TRCIMSPEC6}, + {"trcimspec7", TRCIMSPEC7}, + {"trcrsctlr2", TRCRSCTLR2}, + {"trcrsctlr3", TRCRSCTLR3}, + {"trcrsctlr4", TRCRSCTLR4}, + {"trcrsctlr5", TRCRSCTLR5}, + {"trcrsctlr6", TRCRSCTLR6}, + {"trcrsctlr7", TRCRSCTLR7}, + {"trcrsctlr8", TRCRSCTLR8}, + {"trcrsctlr9", TRCRSCTLR9}, + {"trcrsctlr10", TRCRSCTLR10}, + {"trcrsctlr11", TRCRSCTLR11}, + {"trcrsctlr12", TRCRSCTLR12}, + {"trcrsctlr13", TRCRSCTLR13}, + {"trcrsctlr14", TRCRSCTLR14}, + {"trcrsctlr15", TRCRSCTLR15}, + {"trcrsctlr16", TRCRSCTLR16}, + {"trcrsctlr17", TRCRSCTLR17}, + {"trcrsctlr18", TRCRSCTLR18}, + {"trcrsctlr19", TRCRSCTLR19}, + {"trcrsctlr20", TRCRSCTLR20}, + {"trcrsctlr21", TRCRSCTLR21}, + {"trcrsctlr22", TRCRSCTLR22}, + {"trcrsctlr23", TRCRSCTLR23}, + {"trcrsctlr24", TRCRSCTLR24}, + {"trcrsctlr25", TRCRSCTLR25}, + {"trcrsctlr26", TRCRSCTLR26}, + {"trcrsctlr27", TRCRSCTLR27}, + {"trcrsctlr28", TRCRSCTLR28}, + {"trcrsctlr29", TRCRSCTLR29}, + {"trcrsctlr30", TRCRSCTLR30}, + {"trcrsctlr31", TRCRSCTLR31}, + {"trcssccr0", TRCSSCCR0}, + {"trcssccr1", TRCSSCCR1}, + {"trcssccr2", TRCSSCCR2}, + {"trcssccr3", TRCSSCCR3}, + {"trcssccr4", TRCSSCCR4}, + {"trcssccr5", TRCSSCCR5}, + {"trcssccr6", TRCSSCCR6}, + {"trcssccr7", TRCSSCCR7}, + {"trcsscsr0", TRCSSCSR0}, + {"trcsscsr1", TRCSSCSR1}, + {"trcsscsr2", TRCSSCSR2}, + {"trcsscsr3", TRCSSCSR3}, + {"trcsscsr4", TRCSSCSR4}, + {"trcsscsr5", TRCSSCSR5}, + {"trcsscsr6", TRCSSCSR6}, + {"trcsscsr7", TRCSSCSR7}, + {"trcsspcicr0", TRCSSPCICR0}, + {"trcsspcicr1", TRCSSPCICR1}, + {"trcsspcicr2", TRCSSPCICR2}, + {"trcsspcicr3", TRCSSPCICR3}, + {"trcsspcicr4", TRCSSPCICR4}, + {"trcsspcicr5", TRCSSPCICR5}, + {"trcsspcicr6", TRCSSPCICR6}, + {"trcsspcicr7", TRCSSPCICR7}, + {"trcpdcr", TRCPDCR}, + {"trcacvr0", TRCACVR0}, + {"trcacvr1", TRCACVR1}, + {"trcacvr2", TRCACVR2}, + {"trcacvr3", TRCACVR3}, + {"trcacvr4", TRCACVR4}, + {"trcacvr5", TRCACVR5}, + {"trcacvr6", TRCACVR6}, + {"trcacvr7", TRCACVR7}, + {"trcacvr8", TRCACVR8}, + {"trcacvr9", TRCACVR9}, + {"trcacvr10", TRCACVR10}, + {"trcacvr11", TRCACVR11}, + {"trcacvr12", TRCACVR12}, + {"trcacvr13", TRCACVR13}, + {"trcacvr14", TRCACVR14}, + {"trcacvr15", TRCACVR15}, + {"trcacatr0", TRCACATR0}, + {"trcacatr1", TRCACATR1}, + {"trcacatr2", TRCACATR2}, + {"trcacatr3", TRCACATR3}, + {"trcacatr4", TRCACATR4}, + {"trcacatr5", TRCACATR5}, + {"trcacatr6", TRCACATR6}, + {"trcacatr7", TRCACATR7}, + {"trcacatr8", TRCACATR8}, + {"trcacatr9", TRCACATR9}, + {"trcacatr10", TRCACATR10}, + {"trcacatr11", TRCACATR11}, + {"trcacatr12", TRCACATR12}, + {"trcacatr13", TRCACATR13}, + {"trcacatr14", TRCACATR14}, + {"trcacatr15", TRCACATR15}, + {"trcdvcvr0", TRCDVCVR0}, + {"trcdvcvr1", TRCDVCVR1}, + {"trcdvcvr2", TRCDVCVR2}, + {"trcdvcvr3", TRCDVCVR3}, + {"trcdvcvr4", TRCDVCVR4}, + {"trcdvcvr5", TRCDVCVR5}, + {"trcdvcvr6", TRCDVCVR6}, + {"trcdvcvr7", TRCDVCVR7}, + {"trcdvcmr0", TRCDVCMR0}, + {"trcdvcmr1", TRCDVCMR1}, + {"trcdvcmr2", TRCDVCMR2}, + {"trcdvcmr3", TRCDVCMR3}, + {"trcdvcmr4", TRCDVCMR4}, + {"trcdvcmr5", TRCDVCMR5}, + {"trcdvcmr6", TRCDVCMR6}, + {"trcdvcmr7", TRCDVCMR7}, + {"trccidcvr0", TRCCIDCVR0}, + {"trccidcvr1", TRCCIDCVR1}, + {"trccidcvr2", TRCCIDCVR2}, + {"trccidcvr3", TRCCIDCVR3}, + {"trccidcvr4", TRCCIDCVR4}, + {"trccidcvr5", TRCCIDCVR5}, + {"trccidcvr6", TRCCIDCVR6}, + {"trccidcvr7", TRCCIDCVR7}, + {"trcvmidcvr0", TRCVMIDCVR0}, + {"trcvmidcvr1", TRCVMIDCVR1}, + {"trcvmidcvr2", TRCVMIDCVR2}, + {"trcvmidcvr3", TRCVMIDCVR3}, + {"trcvmidcvr4", TRCVMIDCVR4}, + {"trcvmidcvr5", TRCVMIDCVR5}, + {"trcvmidcvr6", TRCVMIDCVR6}, + {"trcvmidcvr7", TRCVMIDCVR7}, + {"trccidcctlr0", TRCCIDCCTLR0}, + {"trccidcctlr1", TRCCIDCCTLR1}, + {"trcvmidcctlr0", TRCVMIDCCTLR0}, + {"trcvmidcctlr1", TRCVMIDCCTLR1}, + {"trcitctrl", TRCITCTRL}, + {"trcclaimset", TRCCLAIMSET}, + {"trcclaimclr", TRCCLAIMCLR}, + + // GICv3 registers + {"icc_bpr1_el1", ICC_BPR1_EL1}, + {"icc_bpr0_el1", ICC_BPR0_EL1}, + {"icc_pmr_el1", ICC_PMR_EL1}, + {"icc_ctlr_el1", ICC_CTLR_EL1}, + {"icc_ctlr_el3", ICC_CTLR_EL3}, + {"icc_sre_el1", ICC_SRE_EL1}, + {"icc_sre_el2", ICC_SRE_EL2}, + {"icc_sre_el3", ICC_SRE_EL3}, + {"icc_igrpen0_el1", ICC_IGRPEN0_EL1}, + {"icc_igrpen1_el1", ICC_IGRPEN1_EL1}, + {"icc_igrpen1_el3", ICC_IGRPEN1_EL3}, + {"icc_seien_el1", ICC_SEIEN_EL1}, + {"icc_ap0r0_el1", ICC_AP0R0_EL1}, + {"icc_ap0r1_el1", ICC_AP0R1_EL1}, + {"icc_ap0r2_el1", ICC_AP0R2_EL1}, + {"icc_ap0r3_el1", ICC_AP0R3_EL1}, + {"icc_ap1r0_el1", ICC_AP1R0_EL1}, + {"icc_ap1r1_el1", ICC_AP1R1_EL1}, + {"icc_ap1r2_el1", ICC_AP1R2_EL1}, + {"icc_ap1r3_el1", ICC_AP1R3_EL1}, + {"ich_ap0r0_el2", ICH_AP0R0_EL2}, + {"ich_ap0r1_el2", ICH_AP0R1_EL2}, + {"ich_ap0r2_el2", ICH_AP0R2_EL2}, + {"ich_ap0r3_el2", ICH_AP0R3_EL2}, + {"ich_ap1r0_el2", ICH_AP1R0_EL2}, + {"ich_ap1r1_el2", ICH_AP1R1_EL2}, + {"ich_ap1r2_el2", ICH_AP1R2_EL2}, + {"ich_ap1r3_el2", ICH_AP1R3_EL2}, + {"ich_hcr_el2", ICH_HCR_EL2}, + {"ich_misr_el2", ICH_MISR_EL2}, + {"ich_vmcr_el2", ICH_VMCR_EL2}, + {"ich_vseir_el2", ICH_VSEIR_EL2}, + {"ich_lr0_el2", ICH_LR0_EL2}, + {"ich_lr1_el2", ICH_LR1_EL2}, + {"ich_lr2_el2", ICH_LR2_EL2}, + {"ich_lr3_el2", ICH_LR3_EL2}, + {"ich_lr4_el2", ICH_LR4_EL2}, + {"ich_lr5_el2", ICH_LR5_EL2}, + {"ich_lr6_el2", ICH_LR6_EL2}, + {"ich_lr7_el2", ICH_LR7_EL2}, + {"ich_lr8_el2", ICH_LR8_EL2}, + {"ich_lr9_el2", ICH_LR9_EL2}, + {"ich_lr10_el2", ICH_LR10_EL2}, + {"ich_lr11_el2", ICH_LR11_EL2}, + {"ich_lr12_el2", ICH_LR12_EL2}, + {"ich_lr13_el2", ICH_LR13_EL2}, + {"ich_lr14_el2", ICH_LR14_EL2}, + {"ich_lr15_el2", ICH_LR15_EL2} +}; + +uint32_t +A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { + // First search the registers shared by all + std::string NameLower = Name.lower(); + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Name == NameLower) { + Valid = true; + return SysRegPairs[i].Value; + } + } + + // Now try the instruction-specific registers (either read-only or + // write-only). + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Name == NameLower) { + Valid = true; + return InstPairs[i].Value; + } + } + + // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits + // are: 11 xxx 1x11 xxxx xxx + Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$"); + + SmallVector<StringRef, 4> Ops; + if (!GenericRegPattern.match(NameLower, &Ops)) { + Valid = false; + return -1; + } + + uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; + uint32_t Bits; + Ops[1].getAsInteger(10, Op1); + Ops[2].getAsInteger(10, CRn); + Ops[3].getAsInteger(10, CRm); + Ops[4].getAsInteger(10, Op2); + Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2; + + Valid = true; + return Bits; +} + +std::string +A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Value == Bits) { + Valid = true; + return SysRegPairs[i].Name; + } + } + + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Value == Bits) { + Valid = true; + return InstPairs[i].Name; + } + } + + uint32_t Op0 = (Bits >> 14) & 0x3; + uint32_t Op1 = (Bits >> 11) & 0x7; + uint32_t CRn = (Bits >> 7) & 0xf; + uint32_t CRm = (Bits >> 3) & 0xf; + uint32_t Op2 = Bits & 0x7; + + // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic + // name. + if (Op0 != 3 || (CRn != 11 && CRn != 15)) { + Valid = false; + return ""; + } + + assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg"); + + Valid = true; + return "s3_" + utostr(Op1) + "_c" + utostr(CRn) + + "_c" + utostr(CRm) + "_" + utostr(Op2); +} + +const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { + {"ipas2e1is", IPAS2E1IS}, + {"ipas2le1is", IPAS2LE1IS}, + {"vmalle1is", VMALLE1IS}, + {"alle2is", ALLE2IS}, + {"alle3is", ALLE3IS}, + {"vae1is", VAE1IS}, + {"vae2is", VAE2IS}, + {"vae3is", VAE3IS}, + {"aside1is", ASIDE1IS}, + {"vaae1is", VAAE1IS}, + {"alle1is", ALLE1IS}, + {"vale1is", VALE1IS}, + {"vale2is", VALE2IS}, + {"vale3is", VALE3IS}, + {"vmalls12e1is", VMALLS12E1IS}, + {"vaale1is", VAALE1IS}, + {"ipas2e1", IPAS2E1}, + {"ipas2le1", IPAS2LE1}, + {"vmalle1", VMALLE1}, + {"alle2", ALLE2}, + {"alle3", ALLE3}, + {"vae1", VAE1}, + {"vae2", VAE2}, + {"vae3", VAE3}, + {"aside1", ASIDE1}, + {"vaae1", VAAE1}, + {"alle1", ALLE1}, + {"vale1", VALE1}, + {"vale2", VALE2}, + {"vale3", VALE3}, + {"vmalls12e1", VMALLS12E1}, + {"vaale1", VAALE1} +}; + +A64TLBI::TLBIMapper::TLBIMapper() + : NamedImmMapper(TLBIPairs, 0) {} + +bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { + const fltSemantics &Sem = Val.getSemantics(); + unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; + + uint32_t ExpMask; + switch (FracBits) { + case 10: // IEEE half-precision + ExpMask = 0x1f; + break; + case 23: // IEEE single-precision + ExpMask = 0xff; + break; + case 52: // IEEE double-precision + ExpMask = 0x7ff; + break; + case 112: // IEEE quad-precision + // No immediates are valid for double precision. + return false; + default: + llvm_unreachable("Only half, single and double precision supported"); + } + + uint32_t ExpStart = FracBits; + uint64_t FracMask = (1ULL << FracBits) - 1; + + uint32_t Sign = Val.isNegative(); + + uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); + uint64_t Fraction = Bits & FracMask; + int32_t Exponent = ((Bits >> ExpStart) & ExpMask); + Exponent -= ExpMask >> 1; + + // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) + // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) + // This translates to: only 4 bits of fraction; -3 <= exp <= 4. + uint64_t A64FracStart = FracBits - 4; + uint64_t A64FracMask = 0xf; + + // Are there too many fraction bits? + if (Fraction & ~(A64FracMask << A64FracStart)) + return false; + + if (Exponent < -3 || Exponent > 4) + return false; + + uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; + uint32_t PackedExp = (Exponent + 7) & 0x7; + + Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; + return true; +} + +// Encoding of the immediate for logical (immediate) instructions: +// +// | N | imms | immr | size | R | S | +// |---+--------+--------+------+--------------+--------------| +// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | +// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | +// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | +// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | +// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | +// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | +// | 0 | 11111x | - | | UNALLOCATED | | +// +// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in +// which the lower S+1 bits are ones and the remaining bits are zero, then +// rotated right by R bits, which is then replicated across the datapath. +// +// + Values of 'N', 'imms' and 'immr' which do not match the above table are +// RESERVED. +// + If all 's' bits in the imms field are set then the instruction is +// RESERVED. +// + The 'x' bits in the 'immr' field are IGNORED. + +bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { + int RepeatWidth; + int Rotation = 0; + int Num1s = 0; + + // Because there are S+1 ones in the replicated mask, an immediate of all + // zeros is not allowed. Filtering it here is probably more efficient. + if (Imm == 0) return false; + + for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { + uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; + uint64_t ReplicatedMask = Imm & RepeatMask; + + if (ReplicatedMask == 0) continue; + + // First we have to make sure the mask is actually repeated in each slot for + // this width-specifier. + bool IsReplicatedMask = true; + for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { + if (((Imm >> i) & RepeatMask) != ReplicatedMask) { + IsReplicatedMask = false; + break; + } + } + if (!IsReplicatedMask) continue; + + // Now we have to work out the amount of rotation needed. The first part of + // this calculation is actually independent of RepeatWidth, but the complex + // case will depend on it. + Rotation = CountTrailingZeros_64(Imm); + if (Rotation == 0) { + // There were no leading zeros, which means it's either in place or there + // are 1s at each end (e.g. 0x8003 needs rotating). + Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) + : CountLeadingOnes_32(Imm); + Rotation = RepeatWidth - Rotation; + } + + uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation) + | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + // Of course, they may not actually be ones, so we have to check that: + if (!isMask_64(ReplicatedOnes)) + continue; + + Num1s = CountTrailingOnes_64(ReplicatedOnes); + + // We know we've got an almost valid encoding (certainly, if this is invalid + // no other parameters would work). + break; + } + + // The encodings which would produce all 1s are RESERVED. + if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; + + uint32_t N = RepeatWidth == 64; + uint32_t ImmR = RepeatWidth - Rotation; + uint32_t ImmS = Num1s - 1; + + switch (RepeatWidth) { + default: break; // No action required for other valid rotations. + case 16: ImmS |= 0x20; break; // 10ssss + case 8: ImmS |= 0x30; break; // 110sss + case 4: ImmS |= 0x38; break; // 1110ss + case 2: ImmS |= 0x3c; break; // 11110s + } + + Bits = ImmS | (ImmR << 6) | (N << 12); + + return true; +} + + +bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, + uint64_t &Imm) { + uint32_t N = Bits >> 12; + uint32_t ImmR = (Bits >> 6) & 0x3f; + uint32_t ImmS = Bits & 0x3f; + + // N=1 encodes a 64-bit replication and is invalid for the 32-bit + // instructions. + if (RegWidth == 32 && N != 0) return false; + + int Width = 0; + if (N == 1) + Width = 64; + else if ((ImmS & 0x20) == 0) + Width = 32; + else if ((ImmS & 0x10) == 0) + Width = 16; + else if ((ImmS & 0x08) == 0) + Width = 8; + else if ((ImmS & 0x04) == 0) + Width = 4; + else if ((ImmS & 0x02) == 0) + Width = 2; + else { + // ImmS is 0b11111x: UNALLOCATED + return false; + } + + int Num1s = (ImmS & (Width - 1)) + 1; + + // All encodings which would map to -1 (signed) are RESERVED. + if (Num1s == Width) return false; + + int Rotation = (ImmR & (Width - 1)); + uint64_t Mask = (1ULL << Num1s) - 1; + uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; + Mask = (Mask >> Rotation) + | ((Mask << (Width - Rotation)) & WidthMask); + + Imm = 0; + for (unsigned i = 0; i < RegWidth / Width; ++i) { + Imm |= Mask; + Mask <<= Width; + } + + return true; +} + +bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // If high bits are set then a 32-bit MOVZ can't possibly work. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + for (int i = 0; i < RegWidth; i += 16) { + // If the value is 0 when we mask out all the bits that could be set with + // the current LSL value then it's representable. + if ((Value & ~(0xffffULL << i)) == 0) { + Shift = i / 16; + UImm16 = (Value >> i) & 0xffff; + return true; + } + } + return false; +} + +bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // MOVN is defined to set its register to NOT(LSL(imm16, shift)). + + // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* + // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not + // a valid input for isMOVZImm. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; + + return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); +} + +bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, + int &UImm16, int &Shift) { + if (isMOVZImm(RegWidth, Value, UImm16, Shift)) + return false; + + return isMOVNImm(RegWidth, Value, UImm16, Shift); +} diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h new file mode 100644 index 0000000..1b773d6 --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -0,0 +1,1068 @@ +//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the AArch64 target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_BASEINFO_H +#define LLVM_AARCH64_BASEINFO_H + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +// // Enums corresponding to AArch64 condition codes +namespace A64CC { + // The CondCodes constants map directly to the 4-bit encoding of the + // condition field for predicated instructions. + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ = 0, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Unsigned higher or same >, ==, or unordered + LO, // Unsigned lower or same Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Ordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Signed greater than Greater than + LE, // Signed less than or equal <, ==, or unordered + AL, // Always (unconditional) Always (unconditional) + NV, // Always (unconditional) Always (unconditional) + // Note the NV exists purely to disassemble 0b1111. Execution + // is "always". + Invalid + }; + +} // namespace A64CC + +inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case A64CC::EQ: return "eq"; + case A64CC::NE: return "ne"; + case A64CC::HS: return "hs"; + case A64CC::LO: return "lo"; + case A64CC::MI: return "mi"; + case A64CC::PL: return "pl"; + case A64CC::VS: return "vs"; + case A64CC::VC: return "vc"; + case A64CC::HI: return "hi"; + case A64CC::LS: return "ls"; + case A64CC::GE: return "ge"; + case A64CC::LT: return "lt"; + case A64CC::GT: return "gt"; + case A64CC::LE: return "le"; + case A64CC::AL: return "al"; + case A64CC::NV: return "nv"; + } +} + +inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { + return StringSwitch<A64CC::CondCodes>(CondStr.lower()) + .Case("eq", A64CC::EQ) + .Case("ne", A64CC::NE) + .Case("ne", A64CC::NE) + .Case("hs", A64CC::HS) + .Case("cs", A64CC::HS) + .Case("lo", A64CC::LO) + .Case("cc", A64CC::LO) + .Case("mi", A64CC::MI) + .Case("pl", A64CC::PL) + .Case("vs", A64CC::VS) + .Case("vc", A64CC::VC) + .Case("hi", A64CC::HI) + .Case("ls", A64CC::LS) + .Case("ge", A64CC::GE) + .Case("lt", A64CC::LT) + .Case("gt", A64CC::GT) + .Case("le", A64CC::LE) + .Case("al", A64CC::AL) + .Case("nv", A64CC::NV) + .Default(A64CC::Invalid); +} + +inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { + // It turns out that the condition codes have been designed so that in order + // to reverse the intent of the condition you only have to invert the low bit: + + return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1); +} + +/// Instances of this class can perform bidirectional mapping from random +/// identifier strings to operand encodings. For example "MSR" takes a named +/// system-register which must be encoded somehow and decoded for printing. This +/// central location means that the information for those transformations is not +/// duplicated and remains in sync. +/// +/// FIXME: currently the algorithm is a completely unoptimised linear +/// search. Obviously this could be improved, but we would probably want to work +/// out just how often these instructions are emitted before working on it. It +/// might even be optimal to just reorder the tables for the common instructions +/// rather than changing the algorithm. +struct NamedImmMapper { + struct Mapping { + const char *Name; + uint32_t Value; + }; + + template<int N> + NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) + : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + + StringRef toString(uint32_t Value, bool &Valid) const; + uint32_t fromString(StringRef Name, bool &Valid) const; + + /// Many of the instructions allow an alternative assembly form consisting of + /// a simple immediate. Currently the only valid forms are ranges [0, N) where + /// N being 0 indicates no immediate syntax-form is allowed. + bool validImm(uint32_t Value) const; +protected: + const Mapping *Pairs; + size_t NumPairs; + uint32_t TooBigImm; +}; + +namespace A64AT { + enum ATValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + S1E1R = 0x43c0, // 01 000 0111 1000 000 + S1E2R = 0x63c0, // 01 100 0111 1000 000 + S1E3R = 0x73c0, // 01 110 0111 1000 000 + S1E1W = 0x43c1, // 01 000 0111 1000 001 + S1E2W = 0x63c1, // 01 100 0111 1000 001 + S1E3W = 0x73c1, // 01 110 0111 1000 001 + S1E0R = 0x43c2, // 01 000 0111 1000 010 + S1E0W = 0x43c3, // 01 000 0111 1000 011 + S12E1R = 0x63c4, // 01 100 0111 1000 100 + S12E1W = 0x63c5, // 01 100 0111 1000 101 + S12E0R = 0x63c6, // 01 100 0111 1000 110 + S12E0W = 0x63c7 // 01 100 0111 1000 111 + }; + + struct ATMapper : NamedImmMapper { + const static Mapping ATPairs[]; + + ATMapper(); + }; + +} +namespace A64DB { + enum DBValues { + Invalid = -1, + OSHLD = 0x1, + OSHST = 0x2, + OSH = 0x3, + NSHLD = 0x5, + NSHST = 0x6, + NSH = 0x7, + ISHLD = 0x9, + ISHST = 0xa, + ISH = 0xb, + LD = 0xd, + ST = 0xe, + SY = 0xf + }; + + struct DBarrierMapper : NamedImmMapper { + const static Mapping DBarrierPairs[]; + + DBarrierMapper(); + }; +} + +namespace A64DC { + enum DCValues { + Invalid = -1, // Op1 CRn CRm Op2 + ZVA = 0x5ba1, // 01 011 0111 0100 001 + IVAC = 0x43b1, // 01 000 0111 0110 001 + ISW = 0x43b2, // 01 000 0111 0110 010 + CVAC = 0x5bd1, // 01 011 0111 1010 001 + CSW = 0x43d2, // 01 000 0111 1010 010 + CVAU = 0x5bd9, // 01 011 0111 1011 001 + CIVAC = 0x5bf1, // 01 011 0111 1110 001 + CISW = 0x43f2 // 01 000 0111 1110 010 + }; + + struct DCMapper : NamedImmMapper { + const static Mapping DCPairs[]; + + DCMapper(); + }; + +} + +namespace A64IC { + enum ICValues { + Invalid = -1, // Op1 CRn CRm Op2 + IALLUIS = 0x0388, // 000 0111 0001 000 + IALLU = 0x03a8, // 000 0111 0101 000 + IVAU = 0x1ba9 // 011 0111 0101 001 + }; + + + struct ICMapper : NamedImmMapper { + const static Mapping ICPairs[]; + + ICMapper(); + }; + + static inline bool NeedsRegister(ICValues Val) { + return Val == IVAU; + } +} + +namespace A64ISB { + enum ISBValues { + Invalid = -1, + SY = 0xf + }; + struct ISBMapper : NamedImmMapper { + const static Mapping ISBPairs[]; + + ISBMapper(); + }; +} + +namespace A64PRFM { + enum PRFMValues { + Invalid = -1, + PLDL1KEEP = 0x00, + PLDL1STRM = 0x01, + PLDL2KEEP = 0x02, + PLDL2STRM = 0x03, + PLDL3KEEP = 0x04, + PLDL3STRM = 0x05, + PLIL1KEEP = 0x08, + PLIL1STRM = 0x09, + PLIL2KEEP = 0x0a, + PLIL2STRM = 0x0b, + PLIL3KEEP = 0x0c, + PLIL3STRM = 0x0d, + PSTL1KEEP = 0x10, + PSTL1STRM = 0x11, + PSTL2KEEP = 0x12, + PSTL2STRM = 0x13, + PSTL3KEEP = 0x14, + PSTL3STRM = 0x15 + }; + + struct PRFMMapper : NamedImmMapper { + const static Mapping PRFMPairs[]; + + PRFMMapper(); + }; +} + +namespace A64PState { + enum PStateValues { + Invalid = -1, + SPSel = 0x05, + DAIFSet = 0x1e, + DAIFClr = 0x1f + }; + + struct PStateMapper : NamedImmMapper { + const static Mapping PStatePairs[]; + + PStateMapper(); + }; + +} + +namespace A64SE { + enum ShiftExtSpecifiers { + Invalid = -1, + LSL, + LSR, + ASR, + ROR, + + UXTB, + UXTH, + UXTW, + UXTX, + + SXTB, + SXTH, + SXTW, + SXTX + }; +} + +namespace A64SysReg { + enum SysRegROValues { + MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 + DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 + MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000 + OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100 + DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110 + PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110 + PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111 + MIDR_EL1 = 0xc000, // 11 000 0000 0000 000 + CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000 + CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001 + CTR_EL0 = 0xd801, // 11 011 0000 0000 001 + MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101 + REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110 + AIDR_EL1 = 0xc807, // 11 001 0000 0000 111 + DCZID_EL0 = 0xd807, // 11 011 0000 0000 111 + ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000 + ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001 + ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010 + ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011 + ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100 + ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101 + ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110 + ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111 + ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000 + ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001 + ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010 + ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 + ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 + ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 + ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 + ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 + ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 + ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 + ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 + ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 + ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 + ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 + ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 + ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 + MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 + MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 + MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 + RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001 + RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001 + RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 + ISR_EL1 = 0xc608, // 11 000 1100 0001 000 + CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 + CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010 + + // Trace registers + TRCSTATR = 0x8818, // 10 001 0000 0011 000 + TRCIDR8 = 0x8806, // 10 001 0000 0000 110 + TRCIDR9 = 0x880e, // 10 001 0000 0001 110 + TRCIDR10 = 0x8816, // 10 001 0000 0010 110 + TRCIDR11 = 0x881e, // 10 001 0000 0011 110 + TRCIDR12 = 0x8826, // 10 001 0000 0100 110 + TRCIDR13 = 0x882e, // 10 001 0000 0101 110 + TRCIDR0 = 0x8847, // 10 001 0000 1000 111 + TRCIDR1 = 0x884f, // 10 001 0000 1001 111 + TRCIDR2 = 0x8857, // 10 001 0000 1010 111 + TRCIDR3 = 0x885f, // 10 001 0000 1011 111 + TRCIDR4 = 0x8867, // 10 001 0000 1100 111 + TRCIDR5 = 0x886f, // 10 001 0000 1101 111 + TRCIDR6 = 0x8877, // 10 001 0000 1110 111 + TRCIDR7 = 0x887f, // 10 001 0000 1111 111 + TRCOSLSR = 0x888c, // 10 001 0001 0001 100 + TRCPDSR = 0x88ac, // 10 001 0001 0101 100 + TRCDEVAFF0 = 0x8bd6, // 10 001 0111 1010 110 + TRCDEVAFF1 = 0x8bde, // 10 001 0111 1011 110 + TRCLSR = 0x8bee, // 10 001 0111 1101 110 + TRCAUTHSTATUS = 0x8bf6, // 10 001 0111 1110 110 + TRCDEVARCH = 0x8bfe, // 10 001 0111 1111 110 + TRCDEVID = 0x8b97, // 10 001 0111 0010 111 + TRCDEVTYPE = 0x8b9f, // 10 001 0111 0011 111 + TRCPIDR4 = 0x8ba7, // 10 001 0111 0100 111 + TRCPIDR5 = 0x8baf, // 10 001 0111 0101 111 + TRCPIDR6 = 0x8bb7, // 10 001 0111 0110 111 + TRCPIDR7 = 0x8bbf, // 10 001 0111 0111 111 + TRCPIDR0 = 0x8bc7, // 10 001 0111 1000 111 + TRCPIDR1 = 0x8bcf, // 10 001 0111 1001 111 + TRCPIDR2 = 0x8bd7, // 10 001 0111 1010 111 + TRCPIDR3 = 0x8bdf, // 10 001 0111 1011 111 + TRCCIDR0 = 0x8be7, // 10 001 0111 1100 111 + TRCCIDR1 = 0x8bef, // 10 001 0111 1101 111 + TRCCIDR2 = 0x8bf7, // 10 001 0111 1110 111 + TRCCIDR3 = 0x8bff, // 10 001 0111 1111 111 + + // GICv3 registers + ICC_IAR1_EL1 = 0xc660, // 11 000 1100 1100 000 + ICC_IAR0_EL1 = 0xc640, // 11 000 1100 1000 000 + ICC_HPPIR1_EL1 = 0xc662, // 11 000 1100 1100 010 + ICC_HPPIR0_EL1 = 0xc642, // 11 000 1100 1000 010 + ICC_RPR_EL1 = 0xc65b, // 11 000 1100 1011 011 + ICH_VTR_EL2 = 0xe659, // 11 100 1100 1011 001 + ICH_EISR_EL2 = 0xe65b, // 11 100 1100 1011 011 + ICH_ELSR_EL2 = 0xe65d // 11 100 1100 1011 101 + }; + + enum SysRegWOValues { + DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 + OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 + PMSWINC_EL0 = 0xdce4, // 11 011 1001 1100 100 + + // Trace Registers + TRCOSLAR = 0x8884, // 10 001 0001 0000 100 + TRCLAR = 0x8be6, // 10 001 0111 1100 110 + + // GICv3 registers + ICC_EOIR1_EL1 = 0xc661, // 11 000 1100 1100 001 + ICC_EOIR0_EL1 = 0xc641, // 11 000 1100 1000 001 + ICC_DIR_EL1 = 0xc659, // 11 000 1100 1011 001 + ICC_SGI1R_EL1 = 0xc65d, // 11 000 1100 1011 101 + ICC_ASGI1R_EL1 = 0xc65e, // 11 000 1100 1011 110 + ICC_SGI0R_EL1 = 0xc65f // 11 000 1100 1011 111 + }; + + enum SysRegValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010 + OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010 + TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000 + MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000 + MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010 + DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000 + OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010 + DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000 + DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100 + DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100 + DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100 + DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100 + DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100 + DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100 + DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100 + DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100 + DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100 + DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100 + DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100 + DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100 + DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100 + DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100 + DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100 + DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100 + DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101 + DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101 + DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101 + DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101 + DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101 + DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101 + DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101 + DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101 + DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101 + DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101 + DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101 + DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101 + DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101 + DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101 + DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101 + DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101 + DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110 + DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110 + DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110 + DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110 + DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110 + DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110 + DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110 + DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110 + DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110 + DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110 + DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110 + DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110 + DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110 + DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110 + DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110 + DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110 + DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111 + DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111 + DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111 + DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111 + DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111 + DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111 + DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111 + DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111 + DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111 + DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111 + DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111 + DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111 + DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111 + DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111 + DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111 + DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111 + TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000 + OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100 + DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100 + DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110 + DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110 + CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000 + VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000 + VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101 + CPACR_EL1 = 0xc082, // 11 000 0001 0000 010 + SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000 + SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000 + SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000 + ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001 + ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001 + ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001 + HCR_EL2 = 0xe088, // 11 100 0001 0001 000 + SCR_EL3 = 0xf088, // 11 110 0001 0001 000 + MDCR_EL2 = 0xe089, // 11 100 0001 0001 001 + SDER32_EL3 = 0xf089, // 11 110 0001 0001 001 + CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010 + CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010 + HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011 + HACR_EL2 = 0xe08f, // 11 100 0001 0001 111 + MDCR_EL3 = 0xf099, // 11 110 0001 0011 001 + TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000 + TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000 + TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000 + TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001 + TCR_EL1 = 0xc102, // 11 000 0010 0000 010 + TCR_EL2 = 0xe102, // 11 100 0010 0000 010 + TCR_EL3 = 0xf102, // 11 110 0010 0000 010 + VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000 + VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010 + DACR32_EL2 = 0xe180, // 11 100 0011 0000 000 + SPSR_EL1 = 0xc200, // 11 000 0100 0000 000 + SPSR_EL2 = 0xe200, // 11 100 0100 0000 000 + SPSR_EL3 = 0xf200, // 11 110 0100 0000 000 + ELR_EL1 = 0xc201, // 11 000 0100 0000 001 + ELR_EL2 = 0xe201, // 11 100 0100 0000 001 + ELR_EL3 = 0xf201, // 11 110 0100 0000 001 + SP_EL0 = 0xc208, // 11 000 0100 0001 000 + SP_EL1 = 0xe208, // 11 100 0100 0001 000 + SP_EL2 = 0xf208, // 11 110 0100 0001 000 + SPSel = 0xc210, // 11 000 0100 0010 000 + NZCV = 0xda10, // 11 011 0100 0010 000 + DAIF = 0xda11, // 11 011 0100 0010 001 + CurrentEL = 0xc212, // 11 000 0100 0010 010 + SPSR_irq = 0xe218, // 11 100 0100 0011 000 + SPSR_abt = 0xe219, // 11 100 0100 0011 001 + SPSR_und = 0xe21a, // 11 100 0100 0011 010 + SPSR_fiq = 0xe21b, // 11 100 0100 0011 011 + FPCR = 0xda20, // 11 011 0100 0100 000 + FPSR = 0xda21, // 11 011 0100 0100 001 + DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000 + DLR_EL0 = 0xda29, // 11 011 0100 0101 001 + IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001 + AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000 + AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000 + AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000 + AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001 + AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001 + AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001 + ESR_EL1 = 0xc290, // 11 000 0101 0010 000 + ESR_EL2 = 0xe290, // 11 100 0101 0010 000 + ESR_EL3 = 0xf290, // 11 110 0101 0010 000 + FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000 + FAR_EL1 = 0xc300, // 11 000 0110 0000 000 + FAR_EL2 = 0xe300, // 11 100 0110 0000 000 + FAR_EL3 = 0xf300, // 11 110 0110 0000 000 + HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100 + PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000 + PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000 + PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001 + PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010 + PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011 + PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101 + PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000 + PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001 + PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010 + PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000 + PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001 + PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010 + PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011 + MAIR_EL1 = 0xc510, // 11 000 1010 0010 000 + MAIR_EL2 = 0xe510, // 11 100 1010 0010 000 + MAIR_EL3 = 0xf510, // 11 110 1010 0010 000 + AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000 + AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000 + AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000 + VBAR_EL1 = 0xc600, // 11 000 1100 0000 000 + VBAR_EL2 = 0xe600, // 11 100 1100 0000 000 + VBAR_EL3 = 0xf600, // 11 110 1100 0000 000 + RMR_EL1 = 0xc602, // 11 000 1100 0000 010 + RMR_EL2 = 0xe602, // 11 100 1100 0000 010 + RMR_EL3 = 0xf602, // 11 110 1100 0000 010 + CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001 + TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010 + TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010 + TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010 + TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011 + TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100 + CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000 + CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011 + CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000 + CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000 + CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000 + CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000 + CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000 + CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001 + CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001 + CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001 + CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010 + CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010 + CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010 + CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000 + CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001 + CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010 + PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000 + PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001 + PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010 + PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011 + PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100 + PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101 + PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110 + PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111 + PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000 + PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001 + PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010 + PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011 + PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100 + PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101 + PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110 + PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111 + PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000 + PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001 + PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010 + PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011 + PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100 + PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101 + PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110 + PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111 + PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000 + PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001 + PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010 + PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011 + PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100 + PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101 + PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110 + PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111 + PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000 + PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001 + PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010 + PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011 + PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100 + PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101 + PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110 + PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111 + PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000 + PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001 + PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010 + PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011 + PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100 + PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101 + PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110 + PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111 + PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000 + PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001 + PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010 + PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011 + PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100 + PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101 + PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110 + PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111 + PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000 + PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001 + PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010 + PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 + PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 + PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 + PMEVTYPER30_EL0 = 0xdf7e, // 11 011 1110 1111 110 + + // Trace registers + TRCPRGCTLR = 0x8808, // 10 001 0000 0001 000 + TRCPROCSELR = 0x8810, // 10 001 0000 0010 000 + TRCCONFIGR = 0x8820, // 10 001 0000 0100 000 + TRCAUXCTLR = 0x8830, // 10 001 0000 0110 000 + TRCEVENTCTL0R = 0x8840, // 10 001 0000 1000 000 + TRCEVENTCTL1R = 0x8848, // 10 001 0000 1001 000 + TRCSTALLCTLR = 0x8858, // 10 001 0000 1011 000 + TRCTSCTLR = 0x8860, // 10 001 0000 1100 000 + TRCSYNCPR = 0x8868, // 10 001 0000 1101 000 + TRCCCCTLR = 0x8870, // 10 001 0000 1110 000 + TRCBBCTLR = 0x8878, // 10 001 0000 1111 000 + TRCTRACEIDR = 0x8801, // 10 001 0000 0000 001 + TRCQCTLR = 0x8809, // 10 001 0000 0001 001 + TRCVICTLR = 0x8802, // 10 001 0000 0000 010 + TRCVIIECTLR = 0x880a, // 10 001 0000 0001 010 + TRCVISSCTLR = 0x8812, // 10 001 0000 0010 010 + TRCVIPCSSCTLR = 0x881a, // 10 001 0000 0011 010 + TRCVDCTLR = 0x8842, // 10 001 0000 1000 010 + TRCVDSACCTLR = 0x884a, // 10 001 0000 1001 010 + TRCVDARCCTLR = 0x8852, // 10 001 0000 1010 010 + TRCSEQEVR0 = 0x8804, // 10 001 0000 0000 100 + TRCSEQEVR1 = 0x880c, // 10 001 0000 0001 100 + TRCSEQEVR2 = 0x8814, // 10 001 0000 0010 100 + TRCSEQRSTEVR = 0x8834, // 10 001 0000 0110 100 + TRCSEQSTR = 0x883c, // 10 001 0000 0111 100 + TRCEXTINSELR = 0x8844, // 10 001 0000 1000 100 + TRCCNTRLDVR0 = 0x8805, // 10 001 0000 0000 101 + TRCCNTRLDVR1 = 0x880d, // 10 001 0000 0001 101 + TRCCNTRLDVR2 = 0x8815, // 10 001 0000 0010 101 + TRCCNTRLDVR3 = 0x881d, // 10 001 0000 0011 101 + TRCCNTCTLR0 = 0x8825, // 10 001 0000 0100 101 + TRCCNTCTLR1 = 0x882d, // 10 001 0000 0101 101 + TRCCNTCTLR2 = 0x8835, // 10 001 0000 0110 101 + TRCCNTCTLR3 = 0x883d, // 10 001 0000 0111 101 + TRCCNTVR0 = 0x8845, // 10 001 0000 1000 101 + TRCCNTVR1 = 0x884d, // 10 001 0000 1001 101 + TRCCNTVR2 = 0x8855, // 10 001 0000 1010 101 + TRCCNTVR3 = 0x885d, // 10 001 0000 1011 101 + TRCIMSPEC0 = 0x8807, // 10 001 0000 0000 111 + TRCIMSPEC1 = 0x880f, // 10 001 0000 0001 111 + TRCIMSPEC2 = 0x8817, // 10 001 0000 0010 111 + TRCIMSPEC3 = 0x881f, // 10 001 0000 0011 111 + TRCIMSPEC4 = 0x8827, // 10 001 0000 0100 111 + TRCIMSPEC5 = 0x882f, // 10 001 0000 0101 111 + TRCIMSPEC6 = 0x8837, // 10 001 0000 0110 111 + TRCIMSPEC7 = 0x883f, // 10 001 0000 0111 111 + TRCRSCTLR2 = 0x8890, // 10 001 0001 0010 000 + TRCRSCTLR3 = 0x8898, // 10 001 0001 0011 000 + TRCRSCTLR4 = 0x88a0, // 10 001 0001 0100 000 + TRCRSCTLR5 = 0x88a8, // 10 001 0001 0101 000 + TRCRSCTLR6 = 0x88b0, // 10 001 0001 0110 000 + TRCRSCTLR7 = 0x88b8, // 10 001 0001 0111 000 + TRCRSCTLR8 = 0x88c0, // 10 001 0001 1000 000 + TRCRSCTLR9 = 0x88c8, // 10 001 0001 1001 000 + TRCRSCTLR10 = 0x88d0, // 10 001 0001 1010 000 + TRCRSCTLR11 = 0x88d8, // 10 001 0001 1011 000 + TRCRSCTLR12 = 0x88e0, // 10 001 0001 1100 000 + TRCRSCTLR13 = 0x88e8, // 10 001 0001 1101 000 + TRCRSCTLR14 = 0x88f0, // 10 001 0001 1110 000 + TRCRSCTLR15 = 0x88f8, // 10 001 0001 1111 000 + TRCRSCTLR16 = 0x8881, // 10 001 0001 0000 001 + TRCRSCTLR17 = 0x8889, // 10 001 0001 0001 001 + TRCRSCTLR18 = 0x8891, // 10 001 0001 0010 001 + TRCRSCTLR19 = 0x8899, // 10 001 0001 0011 001 + TRCRSCTLR20 = 0x88a1, // 10 001 0001 0100 001 + TRCRSCTLR21 = 0x88a9, // 10 001 0001 0101 001 + TRCRSCTLR22 = 0x88b1, // 10 001 0001 0110 001 + TRCRSCTLR23 = 0x88b9, // 10 001 0001 0111 001 + TRCRSCTLR24 = 0x88c1, // 10 001 0001 1000 001 + TRCRSCTLR25 = 0x88c9, // 10 001 0001 1001 001 + TRCRSCTLR26 = 0x88d1, // 10 001 0001 1010 001 + TRCRSCTLR27 = 0x88d9, // 10 001 0001 1011 001 + TRCRSCTLR28 = 0x88e1, // 10 001 0001 1100 001 + TRCRSCTLR29 = 0x88e9, // 10 001 0001 1101 001 + TRCRSCTLR30 = 0x88f1, // 10 001 0001 1110 001 + TRCRSCTLR31 = 0x88f9, // 10 001 0001 1111 001 + TRCSSCCR0 = 0x8882, // 10 001 0001 0000 010 + TRCSSCCR1 = 0x888a, // 10 001 0001 0001 010 + TRCSSCCR2 = 0x8892, // 10 001 0001 0010 010 + TRCSSCCR3 = 0x889a, // 10 001 0001 0011 010 + TRCSSCCR4 = 0x88a2, // 10 001 0001 0100 010 + TRCSSCCR5 = 0x88aa, // 10 001 0001 0101 010 + TRCSSCCR6 = 0x88b2, // 10 001 0001 0110 010 + TRCSSCCR7 = 0x88ba, // 10 001 0001 0111 010 + TRCSSCSR0 = 0x88c2, // 10 001 0001 1000 010 + TRCSSCSR1 = 0x88ca, // 10 001 0001 1001 010 + TRCSSCSR2 = 0x88d2, // 10 001 0001 1010 010 + TRCSSCSR3 = 0x88da, // 10 001 0001 1011 010 + TRCSSCSR4 = 0x88e2, // 10 001 0001 1100 010 + TRCSSCSR5 = 0x88ea, // 10 001 0001 1101 010 + TRCSSCSR6 = 0x88f2, // 10 001 0001 1110 010 + TRCSSCSR7 = 0x88fa, // 10 001 0001 1111 010 + TRCSSPCICR0 = 0x8883, // 10 001 0001 0000 011 + TRCSSPCICR1 = 0x888b, // 10 001 0001 0001 011 + TRCSSPCICR2 = 0x8893, // 10 001 0001 0010 011 + TRCSSPCICR3 = 0x889b, // 10 001 0001 0011 011 + TRCSSPCICR4 = 0x88a3, // 10 001 0001 0100 011 + TRCSSPCICR5 = 0x88ab, // 10 001 0001 0101 011 + TRCSSPCICR6 = 0x88b3, // 10 001 0001 0110 011 + TRCSSPCICR7 = 0x88bb, // 10 001 0001 0111 011 + TRCPDCR = 0x88a4, // 10 001 0001 0100 100 + TRCACVR0 = 0x8900, // 10 001 0010 0000 000 + TRCACVR1 = 0x8910, // 10 001 0010 0010 000 + TRCACVR2 = 0x8920, // 10 001 0010 0100 000 + TRCACVR3 = 0x8930, // 10 001 0010 0110 000 + TRCACVR4 = 0x8940, // 10 001 0010 1000 000 + TRCACVR5 = 0x8950, // 10 001 0010 1010 000 + TRCACVR6 = 0x8960, // 10 001 0010 1100 000 + TRCACVR7 = 0x8970, // 10 001 0010 1110 000 + TRCACVR8 = 0x8901, // 10 001 0010 0000 001 + TRCACVR9 = 0x8911, // 10 001 0010 0010 001 + TRCACVR10 = 0x8921, // 10 001 0010 0100 001 + TRCACVR11 = 0x8931, // 10 001 0010 0110 001 + TRCACVR12 = 0x8941, // 10 001 0010 1000 001 + TRCACVR13 = 0x8951, // 10 001 0010 1010 001 + TRCACVR14 = 0x8961, // 10 001 0010 1100 001 + TRCACVR15 = 0x8971, // 10 001 0010 1110 001 + TRCACATR0 = 0x8902, // 10 001 0010 0000 010 + TRCACATR1 = 0x8912, // 10 001 0010 0010 010 + TRCACATR2 = 0x8922, // 10 001 0010 0100 010 + TRCACATR3 = 0x8932, // 10 001 0010 0110 010 + TRCACATR4 = 0x8942, // 10 001 0010 1000 010 + TRCACATR5 = 0x8952, // 10 001 0010 1010 010 + TRCACATR6 = 0x8962, // 10 001 0010 1100 010 + TRCACATR7 = 0x8972, // 10 001 0010 1110 010 + TRCACATR8 = 0x8903, // 10 001 0010 0000 011 + TRCACATR9 = 0x8913, // 10 001 0010 0010 011 + TRCACATR10 = 0x8923, // 10 001 0010 0100 011 + TRCACATR11 = 0x8933, // 10 001 0010 0110 011 + TRCACATR12 = 0x8943, // 10 001 0010 1000 011 + TRCACATR13 = 0x8953, // 10 001 0010 1010 011 + TRCACATR14 = 0x8963, // 10 001 0010 1100 011 + TRCACATR15 = 0x8973, // 10 001 0010 1110 011 + TRCDVCVR0 = 0x8904, // 10 001 0010 0000 100 + TRCDVCVR1 = 0x8924, // 10 001 0010 0100 100 + TRCDVCVR2 = 0x8944, // 10 001 0010 1000 100 + TRCDVCVR3 = 0x8964, // 10 001 0010 1100 100 + TRCDVCVR4 = 0x8905, // 10 001 0010 0000 101 + TRCDVCVR5 = 0x8925, // 10 001 0010 0100 101 + TRCDVCVR6 = 0x8945, // 10 001 0010 1000 101 + TRCDVCVR7 = 0x8965, // 10 001 0010 1100 101 + TRCDVCMR0 = 0x8906, // 10 001 0010 0000 110 + TRCDVCMR1 = 0x8926, // 10 001 0010 0100 110 + TRCDVCMR2 = 0x8946, // 10 001 0010 1000 110 + TRCDVCMR3 = 0x8966, // 10 001 0010 1100 110 + TRCDVCMR4 = 0x8907, // 10 001 0010 0000 111 + TRCDVCMR5 = 0x8927, // 10 001 0010 0100 111 + TRCDVCMR6 = 0x8947, // 10 001 0010 1000 111 + TRCDVCMR7 = 0x8967, // 10 001 0010 1100 111 + TRCCIDCVR0 = 0x8980, // 10 001 0011 0000 000 + TRCCIDCVR1 = 0x8990, // 10 001 0011 0010 000 + TRCCIDCVR2 = 0x89a0, // 10 001 0011 0100 000 + TRCCIDCVR3 = 0x89b0, // 10 001 0011 0110 000 + TRCCIDCVR4 = 0x89c0, // 10 001 0011 1000 000 + TRCCIDCVR5 = 0x89d0, // 10 001 0011 1010 000 + TRCCIDCVR6 = 0x89e0, // 10 001 0011 1100 000 + TRCCIDCVR7 = 0x89f0, // 10 001 0011 1110 000 + TRCVMIDCVR0 = 0x8981, // 10 001 0011 0000 001 + TRCVMIDCVR1 = 0x8991, // 10 001 0011 0010 001 + TRCVMIDCVR2 = 0x89a1, // 10 001 0011 0100 001 + TRCVMIDCVR3 = 0x89b1, // 10 001 0011 0110 001 + TRCVMIDCVR4 = 0x89c1, // 10 001 0011 1000 001 + TRCVMIDCVR5 = 0x89d1, // 10 001 0011 1010 001 + TRCVMIDCVR6 = 0x89e1, // 10 001 0011 1100 001 + TRCVMIDCVR7 = 0x89f1, // 10 001 0011 1110 001 + TRCCIDCCTLR0 = 0x8982, // 10 001 0011 0000 010 + TRCCIDCCTLR1 = 0x898a, // 10 001 0011 0001 010 + TRCVMIDCCTLR0 = 0x8992, // 10 001 0011 0010 010 + TRCVMIDCCTLR1 = 0x899a, // 10 001 0011 0011 010 + TRCITCTRL = 0x8b84, // 10 001 0111 0000 100 + TRCCLAIMSET = 0x8bc6, // 10 001 0111 1000 110 + TRCCLAIMCLR = 0x8bce, // 10 001 0111 1001 110 + + // GICv3 registers + ICC_BPR1_EL1 = 0xc663, // 11 000 1100 1100 011 + ICC_BPR0_EL1 = 0xc643, // 11 000 1100 1000 011 + ICC_PMR_EL1 = 0xc230, // 11 000 0100 0110 000 + ICC_CTLR_EL1 = 0xc664, // 11 000 1100 1100 100 + ICC_CTLR_EL3 = 0xf664, // 11 110 1100 1100 100 + ICC_SRE_EL1 = 0xc665, // 11 000 1100 1100 101 + ICC_SRE_EL2 = 0xe64d, // 11 100 1100 1001 101 + ICC_SRE_EL3 = 0xf665, // 11 110 1100 1100 101 + ICC_IGRPEN0_EL1 = 0xc666, // 11 000 1100 1100 110 + ICC_IGRPEN1_EL1 = 0xc667, // 11 000 1100 1100 111 + ICC_IGRPEN1_EL3 = 0xf667, // 11 110 1100 1100 111 + ICC_SEIEN_EL1 = 0xc668, // 11 000 1100 1101 000 + ICC_AP0R0_EL1 = 0xc644, // 11 000 1100 1000 100 + ICC_AP0R1_EL1 = 0xc645, // 11 000 1100 1000 101 + ICC_AP0R2_EL1 = 0xc646, // 11 000 1100 1000 110 + ICC_AP0R3_EL1 = 0xc647, // 11 000 1100 1000 111 + ICC_AP1R0_EL1 = 0xc648, // 11 000 1100 1001 000 + ICC_AP1R1_EL1 = 0xc649, // 11 000 1100 1001 001 + ICC_AP1R2_EL1 = 0xc64a, // 11 000 1100 1001 010 + ICC_AP1R3_EL1 = 0xc64b, // 11 000 1100 1001 011 + ICH_AP0R0_EL2 = 0xe640, // 11 100 1100 1000 000 + ICH_AP0R1_EL2 = 0xe641, // 11 100 1100 1000 001 + ICH_AP0R2_EL2 = 0xe642, // 11 100 1100 1000 010 + ICH_AP0R3_EL2 = 0xe643, // 11 100 1100 1000 011 + ICH_AP1R0_EL2 = 0xe648, // 11 100 1100 1001 000 + ICH_AP1R1_EL2 = 0xe649, // 11 100 1100 1001 001 + ICH_AP1R2_EL2 = 0xe64a, // 11 100 1100 1001 010 + ICH_AP1R3_EL2 = 0xe64b, // 11 100 1100 1001 011 + ICH_HCR_EL2 = 0xe658, // 11 100 1100 1011 000 + ICH_MISR_EL2 = 0xe65a, // 11 100 1100 1011 010 + ICH_VMCR_EL2 = 0xe65f, // 11 100 1100 1011 111 + ICH_VSEIR_EL2 = 0xe64c, // 11 100 1100 1001 100 + ICH_LR0_EL2 = 0xe660, // 11 100 1100 1100 000 + ICH_LR1_EL2 = 0xe661, // 11 100 1100 1100 001 + ICH_LR2_EL2 = 0xe662, // 11 100 1100 1100 010 + ICH_LR3_EL2 = 0xe663, // 11 100 1100 1100 011 + ICH_LR4_EL2 = 0xe664, // 11 100 1100 1100 100 + ICH_LR5_EL2 = 0xe665, // 11 100 1100 1100 101 + ICH_LR6_EL2 = 0xe666, // 11 100 1100 1100 110 + ICH_LR7_EL2 = 0xe667, // 11 100 1100 1100 111 + ICH_LR8_EL2 = 0xe668, // 11 100 1100 1101 000 + ICH_LR9_EL2 = 0xe669, // 11 100 1100 1101 001 + ICH_LR10_EL2 = 0xe66a, // 11 100 1100 1101 010 + ICH_LR11_EL2 = 0xe66b, // 11 100 1100 1101 011 + ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100 + ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 + ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 + ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111 + }; + + // Note that these do not inherit from NamedImmMapper. This class is + // sufficiently different in its behaviour that I don't believe it's worth + // burdening the common NamedImmMapper with abstractions only needed in + // this one case. + struct SysRegMapper { + static const NamedImmMapper::Mapping SysRegPairs[]; + + const NamedImmMapper::Mapping *InstPairs; + size_t NumInstPairs; + + SysRegMapper() {} + uint32_t fromString(StringRef Name, bool &Valid) const; + std::string toString(uint32_t Bits, bool &Valid) const; + }; + + struct MSRMapper : SysRegMapper { + static const NamedImmMapper::Mapping MSRPairs[]; + MSRMapper(); + }; + + struct MRSMapper : SysRegMapper { + static const NamedImmMapper::Mapping MRSPairs[]; + MRSMapper(); + }; + + uint32_t ParseGenericRegister(StringRef Name, bool &Valid); +} + +namespace A64TLBI { + enum TLBIValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 + IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101 + VMALLE1IS = 0x4418, // 01 000 1000 0011 000 + ALLE2IS = 0x6418, // 01 100 1000 0011 000 + ALLE3IS = 0x7418, // 01 110 1000 0011 000 + VAE1IS = 0x4419, // 01 000 1000 0011 001 + VAE2IS = 0x6419, // 01 100 1000 0011 001 + VAE3IS = 0x7419, // 01 110 1000 0011 001 + ASIDE1IS = 0x441a, // 01 000 1000 0011 010 + VAAE1IS = 0x441b, // 01 000 1000 0011 011 + ALLE1IS = 0x641c, // 01 100 1000 0011 100 + VALE1IS = 0x441d, // 01 000 1000 0011 101 + VALE2IS = 0x641d, // 01 100 1000 0011 101 + VALE3IS = 0x741d, // 01 110 1000 0011 101 + VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110 + VAALE1IS = 0x441f, // 01 000 1000 0011 111 + IPAS2E1 = 0x6421, // 01 100 1000 0100 001 + IPAS2LE1 = 0x6425, // 01 100 1000 0100 101 + VMALLE1 = 0x4438, // 01 000 1000 0111 000 + ALLE2 = 0x6438, // 01 100 1000 0111 000 + ALLE3 = 0x7438, // 01 110 1000 0111 000 + VAE1 = 0x4439, // 01 000 1000 0111 001 + VAE2 = 0x6439, // 01 100 1000 0111 001 + VAE3 = 0x7439, // 01 110 1000 0111 001 + ASIDE1 = 0x443a, // 01 000 1000 0111 010 + VAAE1 = 0x443b, // 01 000 1000 0111 011 + ALLE1 = 0x643c, // 01 100 1000 0111 100 + VALE1 = 0x443d, // 01 000 1000 0111 101 + VALE2 = 0x643d, // 01 100 1000 0111 101 + VALE3 = 0x743d, // 01 110 1000 0111 101 + VMALLS12E1 = 0x643e, // 01 100 1000 0111 110 + VAALE1 = 0x443f // 01 000 1000 0111 111 + }; + + struct TLBIMapper : NamedImmMapper { + const static Mapping TLBIPairs[]; + + TLBIMapper(); + }; + + static inline bool NeedsRegister(TLBIValues Val) { + switch (Val) { + case VMALLE1IS: + case ALLE2IS: + case ALLE3IS: + case ALLE1IS: + case VMALLS12E1IS: + case VMALLE1: + case ALLE2: + case ALLE3: + case ALLE1: + case VMALLS12E1: + return false; + default: + return true; + } + } +} + +namespace AArch64II { + + enum TOF { + //===--------------------------------------------------------------===// + // AArch64 Specific MachineOperand flags. + + MO_NO_FLAG, + + // MO_GOT - Represents a relocation referring to the GOT entry of a given + // symbol. Used in adrp. + MO_GOT, + + // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the + // GOT entry of a given symbol. Used in ldr only. + MO_GOT_LO12, + + // MO_DTPREL_* - Represents a relocation referring to the offset from a + // module's dynamic thread pointer. Used in the local-dynamic TLS access + // model. + MO_DTPREL_G1, + MO_DTPREL_G0_NC, + + // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry + // providing the offset of a variable from the thread-pointer. Used in + // initial-exec TLS model where this offset is assigned in the static thread + // block and thus known by the dynamic linker. + MO_GOTTPREL, + MO_GOTTPREL_LO12, + + // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing + // a TLS descriptor chosen by the dynamic linker. Used for the + // general-dynamic and local-dynamic TLS access models where very littls is + // known at link-time. + MO_TLSDESC, + MO_TLSDESC_LO12, + + // MO_TPREL_* - Represents a relocation referring to the offset of a + // variable from the thread pointer itself. Used in the local-exec TLS + // access model. + MO_TPREL_G1, + MO_TPREL_G0_NC, + + // MO_LO12 - On a symbol operand, this represents a relocation containing + // lower 12 bits of the address. Used in add/sub/ldr/str. + MO_LO12 + }; +} + +class APFloat; + +namespace A64Imms { + bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); + + inline bool isFPImm(const APFloat &Val) { + uint32_t Imm8; + return isFPImm(Val, Imm8); + } + + bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); + bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); + + bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + + // We sometimes want to know whether the immediate is representable with a + // MOVN but *not* with a MOVZ (because that would take priority). + bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + +} + +} // end namespace llvm; + +#endif |