summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp403
1 files changed, 330 insertions, 73 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5bf347e..f235313 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -13,15 +13,15 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -67,10 +67,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const AMDGPUSubtarget *Subtarget;
+ AMDGPUAS AMDGPUASI;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel) {}
+ : SelectionDAGISel(TM, OptLevel){
+ AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
+ }
~AMDGPUDAGToDAGISel() override = default;
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -79,7 +82,8 @@ public:
void PostprocessISelDAG() override;
private:
- SDValue foldFrameIndex(SDValue N) const;
+ std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
+ bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
@@ -112,8 +116,13 @@ private:
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
- bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffen(SDNode *Root,
+ SDValue Addr, SDValue &RSrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffset(SDNode *Root,
+ SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset) const;
+
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
@@ -129,8 +138,10 @@ private:
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
SDValue &ImmOffset, SDValue &VOffset) const;
- bool SelectFlat(SDValue Addr, SDValue &VAddr,
- SDValue &SLC, SDValue &TFE) const;
+ bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
+ bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
@@ -143,20 +154,28 @@ private:
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
+
+ bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Omod) const;
bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp,
SDValue &Omod) const;
+ bool SelectVOP3OMods(SDValue In, SDValue &Src,
+ SDValue &Clamp, SDValue &Omod) const;
+
+ bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp) const;
+
void SelectADD_SUB_I64(SDNode *N);
+ void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
@@ -187,6 +206,17 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return SelectionDAGISel::runOnMachineFunction(MF);
}
+bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
+ if (TM.Options.NoNaNsFPMath)
+ return true;
+
+ // TODO: Move into isKnownNeverNaN
+ if (N->getFlags().isDefined())
+ return N->getFlags().hasNoNaNs();
+
+ return CurDAG->isKnownNeverNaN(N);
+}
+
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
const SIInstrInfo *TII
= static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
@@ -250,7 +280,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
return N;
const SITargetLowering& Lowering =
@@ -290,6 +320,20 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
llvm_unreachable("invalid vector size");
}
+static bool getConstantValue(SDValue N, uint32_t &Out) {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ Out = C->getAPIntValue().getZExtValue();
+ return true;
+ }
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+ Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
+ return true;
+ }
+
+ return false;
+}
+
void AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -319,6 +363,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectADD_SUB_I64(N);
return;
}
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SelectUADDO_USUBO(N);
+ return;
+ }
case AMDGPUISD::FMUL_W_CHAIN: {
SelectFMUL_W_CHAIN(N);
return;
@@ -336,7 +385,24 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
+
+ if (VT == MVT::v2i16 || VT == MVT::v2f16) {
+ if (Opc == ISD::BUILD_VECTOR) {
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ uint32_t K = LHSVal | (RHSVal << 16);
+ CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
+ CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
+ return;
+ }
+ }
+
+ break;
+ }
+
assert(EltVT.bitsEq(MVT::i32));
+
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
} else {
@@ -502,7 +568,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
- Lowering.legalizeTargetIndependentNode(N, *CurDAG);
+ N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
break;
}
case ISD::AND:
@@ -531,9 +597,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+ return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
+ return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
}
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
@@ -689,6 +755,17 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
+void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
+ // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
+ // carry out despite the _i32 name. These were renamed in VI to _U32.
+ // FIXME: We should probably rename the opcodes here.
+ unsigned Opc = N->getOpcode() == ISD::UADDO ?
+ AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
+ { N->getOperand(0), N->getOperand(1) });
+}
+
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
SDLoc SL(N);
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
@@ -881,8 +958,12 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
return true;
}
+static bool isLegalMUBUFImmOffset(unsigned Imm) {
+ return isUInt<12>(Imm);
+}
+
static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
- return isUInt<12>(Imm->getZExtValue());
+ return isLegalMUBUFImmOffset(Imm->getZExtValue());
}
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
@@ -998,43 +1079,111 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
-SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
- if (auto FI = dyn_cast<FrameIndexSDNode>(N))
- return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
- return N;
+static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
+ auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
+ return PSV && PSV->isStack();
+}
+
+std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
+ const MachineFunction &MF = CurDAG->getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
+ FI->getValueType(0));
+
+ // If we can resolve this to a frame index access, this is relative to the
+ // frame pointer SGPR.
+ return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
+ MVT::i32));
+ }
+
+ // If we don't know this private access is a local stack object, it needs to
+ // be relative to the entry point's scratch wave offset register.
+ return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
+ MVT::i32));
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &ImmOffset) const {
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root,
+ SDValue Addr, SDValue &Rsrc,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &ImmOffset) const {
SDLoc DL(Addr);
MachineFunction &MF = CurDAG->getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
- SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
- // (add n0, c1)
+ if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ unsigned Imm = CAddr->getZExtValue();
+ assert(!isLegalMUBUFImmOffset(Imm) &&
+ "should have been selected by other pattern");
+
+ SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+ MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+ DL, MVT::i32, HighBits);
+ VAddr = SDValue(MovHighBits, 0);
+
+ // In a call sequence, stores to the argument stack area are relative to the
+ // stack pointer.
+ const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo();
+ unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
+ Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
+
+ SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
+ ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+ return true;
+ }
+
if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ // (add n0, c1)
+
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
// Offsets in vaddr must be positive.
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
if (isLegalMUBUFImmOffset(C1)) {
- VAddr = foldFrameIndex(N0);
+ std::tie(VAddr, SOffset) = foldFrameIndex(N0);
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
}
// (node)
- VAddr = foldFrameIndex(Addr);
+ std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root,
+ SDValue Addr,
+ SDValue &SRsrc,
+ SDValue &SOffset,
+ SDValue &Offset) const {
+ ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
+ if (!CAddr || !isLegalMUBUFImmOffset(CAddr))
+ return false;
+
+ SDLoc DL(Addr);
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
+
+ const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo();
+ unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
+ Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
+
+ // FIXME: Get from MachinePointerInfo? We should only be using the frame
+ // offset if we know this is in a call sequence.
+ SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
+
+ Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
SDValue &GLC, SDValue &SLC,
@@ -1167,23 +1316,35 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
return true;
}
-bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
- SDValue &VAddr,
- SDValue &SLC,
- SDValue &TFE) const {
+bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ int64_t OffsetVal = 0;
+
+ if (Subtarget->hasFlatInstOffsets() &&
+ CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue();
+ if (isUInt<12>(COffsetVal)) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ }
+ }
+
VAddr = Addr;
- TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
+ SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
+
return true;
}
-///
-/// \param EncodedOffset This is the immediate value that will be encoded
-/// directly into the instruction. On SI/CI the \p EncodedOffset
-/// will be in units of dwords and on VI+ it will be units of bytes.
-static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
- int64_t EncodedOffset) {
- return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
- isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
+bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ return SelectFlatOffset(Addr, VAddr, Offset, SLC);
}
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
@@ -1197,10 +1358,9 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDLoc SL(ByteOffsetNode);
AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
int64_t ByteOffset = C->getSExtValue();
- int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
- ByteOffset >> 2 : ByteOffset;
+ int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
- if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
+ if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
Imm = true;
return true;
@@ -1481,7 +1641,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MemSDNode *Mem = cast<MemSDNode>(N);
unsigned AS = Mem->getAddressSpace();
- if (AS == AMDGPUAS::FLAT_ADDRESS) {
+ if (AS == AMDGPUASI.FLAT_ADDRESS) {
SelectCode(N);
return;
}
@@ -1545,7 +1705,6 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
-
Src = In;
if (Src.getOpcode() == ISD::FNEG) {
@@ -1559,42 +1718,29 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
}
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
-
return true;
}
-bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
- SDValue &SrcMods) const {
- bool Res = SelectVOP3Mods(In, Src, SrcMods);
- return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ SelectVOP3Mods(In, Src, SrcMods);
+ return isNoNanSrc(Src);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
+ if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
+ return false;
+
+ Src = In;
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
SDLoc DL(In);
- // FIXME: Handle Clamp and Omod
- Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
-
- return SelectVOP3Mods(In, Src, SrcMods);
-}
-
-bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
- SDValue &SrcMods, SDValue &Clamp,
- SDValue &Omod) const {
- bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
-
- return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
- cast<ConstantSDNode>(Clamp)->isNullValue() &&
- cast<ConstantSDNode>(Omod)->isNullValue();
-}
-
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Omod) const {
- // FIXME: Handle Omod
- Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
return SelectVOP3Mods(In, Src, SrcMods);
}
@@ -1607,6 +1753,117 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
+ SDValue &Clamp, SDValue &Omod) const {
+ Src = In;
+
+ SDLoc DL(In);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
+
+ return true;
+}
+
+static SDValue stripBitcast(SDValue Val) {
+ return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
+}
+
+// Figure out if this is really an extract of the high 16-bits of a dword.
+static bool isExtractHiElt(SDValue In, SDValue &Out) {
+ In = stripBitcast(In);
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue Srl = In.getOperand(0);
+ if (Srl.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+ if (ShiftAmt->getZExtValue() == 16) {
+ Out = stripBitcast(Srl.getOperand(0));
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+// Look through operations that obscure just looking at the low 16-bits of the
+// same register.
+static SDValue stripExtractLoElt(SDValue In) {
+ if (In.getOpcode() == ISD::TRUNCATE) {
+ SDValue Src = In.getOperand(0);
+ if (Src.getValueType().getSizeInBits() == 32)
+ return stripBitcast(Src);
+ }
+
+ return In;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods = 0;
+ Src = In;
+
+ if (Src.getOpcode() == ISD::FNEG) {
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ Src = Src.getOperand(0);
+ }
+
+ if (Src.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned VecMods = Mods;
+
+ SDValue Lo = stripBitcast(Src.getOperand(0));
+ SDValue Hi = stripBitcast(Src.getOperand(1));
+
+ if (Lo.getOpcode() == ISD::FNEG) {
+ Lo = stripBitcast(Lo.getOperand(0));
+ Mods ^= SISrcMods::NEG;
+ }
+
+ if (Hi.getOpcode() == ISD::FNEG) {
+ Hi = stripBitcast(Hi.getOperand(0));
+ Mods ^= SISrcMods::NEG_HI;
+ }
+
+ if (isExtractHiElt(Lo, Lo))
+ Mods |= SISrcMods::OP_SEL_0;
+
+ if (isExtractHiElt(Hi, Hi))
+ Mods |= SISrcMods::OP_SEL_1;
+
+ Lo = stripExtractLoElt(Lo);
+ Hi = stripExtractLoElt(Hi);
+
+ if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
+ // Really a scalar input. Just select from the low half of the register to
+ // avoid packing.
+
+ Src = Lo;
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ Mods = VecMods;
+ }
+
+ // Packed instructions do not have abs modifiers.
+ Mods |= SISrcMods::OP_SEL_1;
+
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods,
+ SDValue &Clamp) const {
+ SDLoc SL(In);
+
+ // FIXME: Handle clamp and op_sel
+ Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
+
+ return SelectVOP3PMods(In, Src, SrcMods);
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
OpenPOWER on IntegriCloud