diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h | 151 |
1 files changed, 119 insertions, 32 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index e68f6f9..3dd5bc8 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -19,6 +19,7 @@ #include "AMDGPUInstrInfo.h" #include "SIDefines.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/SetVector.h" namespace llvm { @@ -38,6 +39,8 @@ private: EXECZ = 3 }; + typedef SmallSetVector<MachineInstr *, 32> SetVectorType; + static unsigned getBranchOpcode(BranchPredicate Cond); static BranchPredicate getBranchPredicate(unsigned Opcode); @@ -56,27 +59,30 @@ private: void swapOperands(MachineInstr &Inst) const; - void lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist, + void lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const; - void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist, + void splitScalar64BitUnaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist, + void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, + void splitScalar64BitBCNT(SetVectorType &Worklist, MachineInstr &Inst) const; - void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, + void splitScalar64BitBFE(SetVectorType &Worklist, MachineInstr &Inst) const; + void movePackToVALU(SetVectorType &Worklist, + MachineRegisterInfo &MRI, + MachineInstr &Inst) const; void addUsersToMoveToVALUWorklist( unsigned Reg, MachineRegisterInfo &MRI, - SmallVectorImpl<MachineInstr *> &Worklist) const; + SetVectorType &Worklist) const; void addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, - SmallVectorImpl<MachineInstr *> &Worklist) const; + SetVectorType &Worklist) const; const TargetRegisterClass * getDestEquivalentVGPRClass(const MachineInstr &Inst) const; @@ -97,6 +103,8 @@ protected: public: enum TargetOperandFlags { + MO_MASK = 0x7, + MO_NONE = 0, // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. MO_GOTPCREL = 1, @@ -140,6 +148,23 @@ public: RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const; + void materializeImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, + unsigned DestReg, + int64_t Value) const; + + const TargetRegisterClass *getPreferredSelectRegClass( + unsigned Size) const; + + unsigned insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned SrcReg, int Value) const; + + unsigned insertEQ(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned SrcReg, int Value) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -190,7 +215,7 @@ public: bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const override; + bool AllowModify = false) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; @@ -203,10 +228,29 @@ public: bool reverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const override; + + bool canInsertSelect(const MachineBasicBlock &MBB, + ArrayRef<MachineOperand> Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, + int &TrueCycles, int &FalseCycles) const override; + + void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DstReg, ArrayRef<MachineOperand> Cond, + unsigned TrueReg, unsigned FalseReg) const override; + + void insertVectorSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DstReg, ArrayRef<MachineOperand> Cond, + unsigned TrueReg, unsigned FalseReg) const; + bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA = nullptr) const override; + bool isFoldableCopy(const MachineInstr &MI) const; + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const final; @@ -308,6 +352,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::VOP3; } + static bool isSDWA(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SDWA; + } + + bool isSDWA(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::SDWA; + } + static bool isVOPC(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::VOPC; } @@ -420,6 +472,22 @@ public: return get(Opcode).TSFlags & SIInstrFlags::DPP; } + static bool isVOP3P(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; + } + + bool isVOP3P(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::VOP3P; + } + + static bool isVINTRP(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; + } + + bool isVINTRP(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::VINTRP; + } + static bool isScalarUnit(const MachineInstr &MI) { return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); } @@ -454,6 +522,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; } + static bool hasFPClamp(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::HasFPClamp; + } + + bool hasFPClamp(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::HasFPClamp; + } + bool isVGPRCopy(const MachineInstr &MI) const { assert(MI.isCopy()); unsigned Dest = MI.getOperand(0).getReg(); @@ -462,28 +538,6 @@ public: return !RI.isSGPRReg(MRI, Dest); } - static int operandBitWidth(uint8_t OperandType) { - switch (OperandType) { - case AMDGPU::OPERAND_REG_IMM_INT32: - case AMDGPU::OPERAND_REG_IMM_FP32: - case AMDGPU::OPERAND_REG_INLINE_C_INT32: - case AMDGPU::OPERAND_REG_INLINE_C_FP32: - return 32; - case AMDGPU::OPERAND_REG_IMM_INT64: - case AMDGPU::OPERAND_REG_IMM_FP64: - case AMDGPU::OPERAND_REG_INLINE_C_INT64: - case AMDGPU::OPERAND_REG_INLINE_C_FP64: - return 64; - case AMDGPU::OPERAND_REG_INLINE_C_INT16: - case AMDGPU::OPERAND_REG_INLINE_C_FP16: - case AMDGPU::OPERAND_REG_IMM_INT16: - case AMDGPU::OPERAND_REG_IMM_FP16: - return 16; - default: - llvm_unreachable("unexpected operand type"); - } - } - bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; @@ -571,6 +625,7 @@ public: bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const; + bool hasAnyModifiersSet(const MachineInstr &MI) const; bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; @@ -598,13 +653,13 @@ public: return 4; } - return RI.getRegClass(OpInfo.RegClass)->getSize(); + return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; } /// \brief This form should usually be preferred since it handles operands /// with unknown register classes. unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { - return getOpRegClass(MI, OpNo)->getSize(); + return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; } /// \returns true if it is legal for the operand at index \p OpNo @@ -677,6 +732,7 @@ public: void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + void insertReturn(MachineBasicBlock &MBB) const; /// \brief Return the number of wait states that result from executing this /// instruction. unsigned getNumWaitStates(const MachineInstr &MI) const; @@ -722,15 +778,40 @@ public: bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; + bool isNonUniformBranchInstr(MachineInstr &Instr) const; + + void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const; + + void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, + MachineBasicBlock *LoopEnd) const; + + std::pair<unsigned, unsigned> + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + ArrayRef<std::pair<int, const char *>> getSerializableTargetIndices() const override; + ArrayRef<std::pair<unsigned, const char *>> + getSerializableDirectMachineOperandTargetFlags() const override; + ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; + + bool isBasicBlockPrologue(const MachineInstr &MI) const override; + + /// \brief Return a partially built integer add instruction without carry. + /// Caller must add source operands. + /// For pre-GFX9 it will generate unused carry destination operand. + /// TODO: After GFX9 it should return a no-carry operation. + MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + unsigned DestReg) const; }; namespace AMDGPU { @@ -741,6 +822,12 @@ namespace AMDGPU { int getVOPe32(uint16_t Opcode); LLVM_READONLY + int getSDWAOp(uint16_t Opcode); + + LLVM_READONLY + int getBasicFromSDWAOp(uint16_t Opcode); + + LLVM_READONLY int getCommuteRev(uint16_t Opcode); LLVM_READONLY |