summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h113
1 files changed, 92 insertions, 21 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index f6adcea..d85aada 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -16,6 +16,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
+#include "AMDGPU.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -32,12 +34,15 @@ private:
/// compare.
SDValue getFFBH_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL) const;
+public:
+ static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op);
+
protected:
const AMDGPUSubtarget *Subtarget;
+ AMDGPUAS AMDGPUASI;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
@@ -47,7 +52,7 @@ protected:
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
@@ -70,6 +75,8 @@ protected:
bool shouldCombineMemoryType(EVT VT) const;
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
unsigned Opc, SDValue LHS,
@@ -85,6 +92,7 @@ protected:
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
@@ -111,24 +119,22 @@ protected:
SmallVectorImpl<SDValue> &Results) const;
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
- void AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const;
- void AnalyzeReturn(CCState &State,
- const SmallVectorImpl<ISD::OutputArg> &Outs) const;
-
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
bool mayIgnoreSignedZero(SDValue Op) const {
- if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
+ if (getTargetMachine().Options.NoSignedZerosFPMath)
return true;
- if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
- return BO->Flags.hasNoSignedZeros();
+ const auto Flags = Op.getNode()->getFlags();
+ if (Flags.isDefined())
+ return Flags.hasNoSignedZeros();
return false;
}
+ static bool allUsesHaveSourceMods(const SDNode *N,
+ unsigned CostThreshold = 4);
bool isFAbsFree(EVT VT) const override;
bool isFNegFree(EVT VT) const override;
bool isTruncateFree(EVT Src, EVT Dest) const override;
@@ -158,6 +164,9 @@ public:
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
+ static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
@@ -174,7 +183,7 @@ public:
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- SDValue CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
SDValue CC, DAGCombinerInfo &DCI) const;
@@ -196,21 +205,37 @@ public:
/// either zero or one and return them in the \p KnownZero and \p KnownOne
/// bitsets.
void computeKnownBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
+ KnownBits &Known,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
+ unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG,
unsigned Depth = 0) const override;
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
///
- /// \returns a RegisterSDNode representing Reg.
- virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const;
+ /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise
+ /// a copy from the register.
+ SDValue CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT,
+ const SDLoc &SL,
+ bool RawReg = false) const;
+ SDValue CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const {
+ return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()));
+ }
+
+ // Returns the raw live in register rather than a copy from it.
+ SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const {
+ return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true);
+ }
enum ImplicitParameter {
FIRST_IMPLICIT,
@@ -222,6 +247,14 @@ public:
/// type of implicit parameter.
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
const ImplicitParameter Param) const;
+
+ AMDGPUAS getAMDGPUAS() const {
+ return AMDGPUASI;
+ }
+
+ MVT getFenceOperandTy(const DataLayout &DL) const override {
+ return MVT::i32;
+ }
};
namespace AMDGPUISD {
@@ -229,15 +262,35 @@ namespace AMDGPUISD {
enum NodeType : unsigned {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
BRANCH_COND,
// End AMDIL ISD Opcodes
+
+ // Function call.
+ CALL,
+ TRAP,
+
+ // Masked control flow nodes.
+ IF,
+ ELSE,
+ LOOP,
+
+ // A uniform kernel return that terminates the wavefront.
ENDPGM,
- RETURN,
+
+ // Return to a shader part's epilog code.
+ RETURN_TO_EPILOG,
+
+ // Return with values from a non-entry function.
+ RET_FLAG,
+
DWORDADDR,
FRACT,
+
+ /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output
+ /// modifier behavior with dx10_enable.
CLAMP,
+
// This is SETCC with the full mask result which is used for a compare with a
// result bit per item in the wavefront.
SETCC,
@@ -265,6 +318,9 @@ enum NodeType : unsigned {
DIV_SCALE,
DIV_FMAS,
DIV_FIXUP,
+ // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
+ // treated as an illegal operation.
+ FMAD_FTZ,
TRIG_PREOP, // 1 ULP max error for f64
// RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
@@ -301,7 +357,6 @@ enum NodeType : unsigned {
CONST_ADDRESS,
REGISTER_LOAD,
REGISTER_STORE,
- LOAD_INPUT,
SAMPLE,
SAMPLEB,
SAMPLED,
@@ -312,6 +367,18 @@ enum NodeType : unsigned {
CVT_F32_UBYTE1,
CVT_F32_UBYTE2,
CVT_F32_UBYTE3,
+
+ // Convert two float 32 numbers into a single register holding two packed f16
+ // with round to zero.
+ CVT_PKRTZ_F16_F32,
+
+ // Same as the standard node, except the high bits of the resulting integer
+ // are known 0.
+ FP_TO_FP16,
+
+ // Wrapper around fp16 results that are known to zero the high bits.
+ FP16_ZEXT,
+
/// This node is for VLIW targets and it is used to represent a vector
/// that is stored in consecutive registers with the same channel.
/// For example:
@@ -323,6 +390,8 @@ enum NodeType : unsigned {
BUILD_VERTICAL_VECTOR,
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
+ INIT_EXEC,
+ INIT_EXEC_FROM_INPUT,
SENDMSG,
SENDMSGHALT,
INTERP_MOV,
@@ -335,6 +404,8 @@ enum NodeType : unsigned {
STORE_MSKOR,
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
+ TBUFFER_STORE_FORMAT_X3,
+ TBUFFER_LOAD_FORMAT,
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,
OpenPOWER on IntegriCloud