diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 113 |
1 files changed, 92 insertions, 21 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index f6adcea..d85aada 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -16,6 +16,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H +#include "AMDGPU.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -32,12 +34,15 @@ private: /// compare. SDValue getFFBH_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL) const; +public: + static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op); + protected: const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; /// \brief Split a vector store into multiple scalar stores. /// \returns The resulting chain. @@ -47,7 +52,7 @@ protected: SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; @@ -70,6 +75,8 @@ protected: bool shouldCombineMemoryType(EVT VT) const; SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, @@ -85,6 +92,7 @@ protected: SDValue RHS, DAGCombinerInfo &DCI) const; SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const; static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); @@ -111,24 +119,22 @@ protected: SmallVectorImpl<SDValue> &Results) const; void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl<ISD::InputArg> &Ins) const; - void AnalyzeFormalArguments(CCState &State, - const SmallVectorImpl<ISD::InputArg> &Ins) const; - void AnalyzeReturn(CCState &State, - const SmallVectorImpl<ISD::OutputArg> &Outs) const; - public: AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); bool mayIgnoreSignedZero(SDValue Op) const { - if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only + if (getTargetMachine().Options.NoSignedZerosFPMath) return true; - if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op)) - return BO->Flags.hasNoSignedZeros(); + const auto Flags = Op.getNode()->getFlags(); + if (Flags.isDefined()) + return Flags.hasNoSignedZeros(); return false; } + static bool allUsesHaveSourceMods(const SDNode *N, + unsigned CostThreshold = 4); bool isFAbsFree(EVT VT) const override; bool isFNegFree(EVT VT) const override; bool isTruncateFree(EVT Src, EVT Dest) const override; @@ -158,6 +164,9 @@ public: bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); + static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, @@ -174,7 +183,7 @@ public: SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const override; - SDValue CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const; @@ -196,21 +205,37 @@ public: /// either zero or one and return them in the \p KnownZero and \p KnownOne /// bitsets. void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; - unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, + unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth = 0) const override; /// \brief Helper function that adds Reg to the LiveIn list of the DAG's /// MachineFunction. /// - /// \returns a RegisterSDNode representing Reg. - virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, - const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const; + /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise + /// a copy from the register. + SDValue CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT, + const SDLoc &SL, + bool RawReg = false) const; + SDValue CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode())); + } + + // Returns the raw live in register rather than a copy from it. + SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true); + } enum ImplicitParameter { FIRST_IMPLICIT, @@ -222,6 +247,14 @@ public: /// type of implicit parameter. uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const; + + AMDGPUAS getAMDGPUAS() const { + return AMDGPUASI; + } + + MVT getFenceOperandTy(const DataLayout &DL) const override { + return MVT::i32; + } }; namespace AMDGPUISD { @@ -229,15 +262,35 @@ namespace AMDGPUISD { enum NodeType : unsigned { // AMDIL ISD Opcodes FIRST_NUMBER = ISD::BUILTIN_OP_END, - CALL, // Function call based on a single integer UMUL, // 32bit unsigned multiplication BRANCH_COND, // End AMDIL ISD Opcodes + + // Function call. + CALL, + TRAP, + + // Masked control flow nodes. + IF, + ELSE, + LOOP, + + // A uniform kernel return that terminates the wavefront. ENDPGM, - RETURN, + + // Return to a shader part's epilog code. + RETURN_TO_EPILOG, + + // Return with values from a non-entry function. + RET_FLAG, + DWORDADDR, FRACT, + + /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output + /// modifier behavior with dx10_enable. CLAMP, + // This is SETCC with the full mask result which is used for a compare with a // result bit per item in the wavefront. SETCC, @@ -265,6 +318,9 @@ enum NodeType : unsigned { DIV_SCALE, DIV_FMAS, DIV_FIXUP, + // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is + // treated as an illegal operation. + FMAD_FTZ, TRIG_PREOP, // 1 ULP max error for f64 // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. @@ -301,7 +357,6 @@ enum NodeType : unsigned { CONST_ADDRESS, REGISTER_LOAD, REGISTER_STORE, - LOAD_INPUT, SAMPLE, SAMPLEB, SAMPLED, @@ -312,6 +367,18 @@ enum NodeType : unsigned { CVT_F32_UBYTE1, CVT_F32_UBYTE2, CVT_F32_UBYTE3, + + // Convert two float 32 numbers into a single register holding two packed f16 + // with round to zero. + CVT_PKRTZ_F16_F32, + + // Same as the standard node, except the high bits of the resulting integer + // are known 0. + FP_TO_FP16, + + // Wrapper around fp16 results that are known to zero the high bits. + FP16_ZEXT, + /// This node is for VLIW targets and it is used to represent a vector /// that is stored in consecutive registers with the same channel. /// For example: @@ -323,6 +390,8 @@ enum NodeType : unsigned { BUILD_VERTICAL_VECTOR, /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, + INIT_EXEC, + INIT_EXEC_FROM_INPUT, SENDMSG, SENDMSGHALT, INTERP_MOV, @@ -335,6 +404,8 @@ enum NodeType : unsigned { STORE_MSKOR, LOAD_CONSTANT, TBUFFER_STORE_FORMAT, + TBUFFER_STORE_FORMAT_X3, + TBUFFER_LOAD_FORMAT, ATOMIC_CMP_SWAP, ATOMIC_INC, ATOMIC_DEC, |