summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp324
1 files changed, 192 insertions, 132 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 23c9352..5bf347e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -12,25 +12,48 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
-#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPURegisterInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <new>
+#include <vector>
using namespace llvm;
namespace llvm {
+
class R600InstrInfo;
-}
+
+} // end namespace llvm
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
@@ -38,18 +61,6 @@ class R600InstrInfo;
namespace {
-static bool isCBranchSCC(const SDNode *N) {
- assert(N->getOpcode() == ISD::BRCOND);
- if (!N->hasOneUse())
- return false;
-
- SDValue Cond = N->getOperand(1);
- if (Cond.getOpcode() == ISD::CopyToReg)
- Cond = Cond.getOperand(2);
- return Cond.getOpcode() == ISD::SETCC &&
- Cond.getOperand(0).getValueType() == MVT::i32 && Cond.hasOneUse();
-}
-
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -58,16 +69,18 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
const AMDGPUSubtarget *Subtarget;
public:
- AMDGPUDAGToDAGISel(TargetMachine &TM);
- virtual ~AMDGPUDAGToDAGISel();
+ explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel) {}
+ ~AMDGPUDAGToDAGISel() override = default;
+
bool runOnMachineFunction(MachineFunction &MF) override;
void Select(SDNode *N) override;
- const char *getPassName() const override;
- void PreprocessISelDAG() override;
+ StringRef getPassName() const override;
void PostprocessISelDAG() override;
private:
- bool isInlineImmediate(SDNode *N) const;
+ SDValue foldFrameIndex(SDValue N) const;
+ bool isInlineImmediate(const SDNode *N) const;
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
@@ -145,40 +158,46 @@ private:
void SelectADD_SUB_I64(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
+ void SelectFMA_W_CHAIN(SDNode *N);
+ void SelectFMUL_W_CHAIN(SDNode *N);
SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
uint32_t Offset, uint32_t Width);
void SelectS_BFEFromShifts(SDNode *N);
void SelectS_BFE(SDNode *N);
+ bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
+
} // end anonymous namespace
/// \brief This pass converts a legalized DAG into a AMDGPU-specific
// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
- return new AMDGPUDAGToDAGISel(TM);
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
-AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
- : SelectionDAGISel(TM) {}
-
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
-AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
-}
+bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
+ const SIInstrInfo *TII
+ = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
+
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+ return TII->isInlineConstant(C->getAPIntValue());
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+ return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
-bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
- const SITargetLowering *TL
- = static_cast<const SITargetLowering *>(getTargetLowering());
- return TL->analyzeImmediate(N) == 0;
+ return false;
}
/// \brief Determine the register class for \p OpNo
@@ -187,8 +206,21 @@ bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
/// determined.
const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
unsigned OpNo) const {
- if (!N->isMachineOpcode())
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
+ return MRI.getRegClass(Reg);
+ }
+
+ const SIRegisterInfo *TRI
+ = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
+ return TRI->getPhysRegClass(Reg);
+ }
+
return nullptr;
+ }
switch (N->getMachineOpcode()) {
default: {
@@ -244,7 +276,7 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
switch (NumVectorElts) {
case 1:
- return AMDGPU::SReg_32RegClassID;
+ return AMDGPU::SReg_32_XM0RegClassID;
case 2:
return AMDGPU::SReg_64RegClassID;
case 4:
@@ -275,7 +307,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
// DAG legalization, so we can fold some i64 ADDs used for address
// calculation into the LOAD and STORE instructions.
case ISD::ADD:
- case ISD::SUB: {
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::SUBC:
+ case ISD::SUBE: {
if (N->getValueType(0) != MVT::i64 ||
Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
@@ -283,6 +319,15 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectADD_SUB_I64(N);
return;
}
+ case AMDGPUISD::FMUL_W_CHAIN: {
+ SelectFMUL_W_CHAIN(N);
+ return;
+ }
+ case AMDGPUISD::FMA_W_CHAIN: {
+ SelectFMA_W_CHAIN(N);
+ return;
+ }
+
case ISD::SCALAR_TO_VECTOR:
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::BUILD_VECTOR: {
@@ -498,7 +543,7 @@ bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
Term->getMetadata("structurizecfg.uniform");
}
-const char *AMDGPUDAGToDAGISel::getPassName() const {
+StringRef AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
@@ -563,6 +608,10 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
+ Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
Base = Addr.getOperand(0);
@@ -580,7 +629,12 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- bool IsAdd = (N->getOpcode() == ISD::ADD);
+ unsigned Opcode = N->getOpcode();
+ bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
+ bool ProduceCarry =
+ ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
+ bool IsAdd =
+ (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
@@ -596,25 +650,70 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
DL, MVT::i32, RHS, Sub1);
SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
- SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
- SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
- SDValue Carry(AddLo, 1);
- SDNode *AddHi
- = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
- SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
+ SDNode *AddLo;
+ if (!ConsumeCarry) {
+ SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
+ AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
+ } else {
+ SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
+ AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
+ }
+ SDValue AddHiArgs[] = {
+ SDValue(Hi0, 0),
+ SDValue(Hi1, 0),
+ SDValue(AddLo, 1)
+ };
+ SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
- SDValue Args[5] = {
+ SDValue RegSequenceArgs[] = {
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
SDValue(AddLo,0),
Sub0,
SDValue(AddHi,0),
Sub1,
};
- CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+ SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::i64, RegSequenceArgs);
+
+ if (ProduceCarry) {
+ // Replace the carry-use
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
+ }
+
+ // Replace the remaining uses.
+ CurDAG->ReplaceAllUsesWith(N, RegSequence);
+ CurDAG->RemoveDeadNode(N);
+}
+
+void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
+ SDLoc SL(N);
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+ SDValue Ops[10];
+
+ SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
+ SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
+ Ops[8] = N->getOperand(0);
+ Ops[9] = N->getOperand(4);
+
+ CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
+}
+
+void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
+ SDLoc SL(N);
+ // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
+ SDValue Ops[8];
+
+ SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
+ SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
+ Ops[6] = N->getOperand(0);
+ Ops[7] = N->getOperand(3);
+
+ CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
}
// We need to handle this here because tablegen doesn't support matching
@@ -628,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
- // omod
- SDValue Ops[8];
-
- SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
- SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
- SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
- CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
@@ -779,6 +872,9 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
}
// default case
+
+ // FIXME: This is broken on SI where we still need to check if the base
+ // pointer is positive here.
Base = Addr;
Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
@@ -825,7 +921,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Ptr = N2;
VAddr = N3;
} else {
-
// (add N0, C1) -> offset
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = N0;
@@ -903,6 +998,12 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
+SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
+ if (auto FI = dyn_cast<FrameIndexSDNode>(N))
+ return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
+ return N;
+}
+
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &ImmOffset) const {
@@ -922,14 +1023,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
// Offsets in vaddr must be positive.
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
if (isLegalMUBUFImmOffset(C1)) {
- VAddr = N0;
+ VAddr = foldFrameIndex(N0);
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
}
// (node)
- VAddr = Addr;
+ VAddr = foldFrameIndex(Addr);
ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
@@ -1122,7 +1223,6 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
SDValue &Offset, bool &Imm) const {
-
SDLoc SL(Addr);
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
@@ -1327,36 +1427,53 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
SelectCode(N);
}
+bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
+ assert(N->getOpcode() == ISD::BRCOND);
+ if (!N->hasOneUse())
+ return false;
+
+ SDValue Cond = N->getOperand(1);
+ if (Cond.getOpcode() == ISD::CopyToReg)
+ Cond = Cond.getOperand(2);
+
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
+ return false;
+
+ MVT VT = Cond.getOperand(0).getSimpleValueType();
+ if (VT == MVT::i32)
+ return true;
+
+ if (VT == MVT::i64) {
+ auto ST = static_cast<const SISubtarget *>(Subtarget);
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
+ }
+
+ return false;
+}
+
void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
SDValue Cond = N->getOperand(1);
+ if (Cond.isUndef()) {
+ CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
+ N->getOperand(2), N->getOperand(0));
+ return;
+ }
+
if (isCBranchSCC(N)) {
// This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
SelectCode(N);
return;
}
- // The result of VOPC instructions is or'd against ~EXEC before it is
- // written to vcc or another SGPR. This means that the value '1' is always
- // written to the corresponding bit for results that are masked. In order
- // to correctly check against vccz, we need to and VCC with the EXEC
- // register in order to clear the value from the masked bits.
-
SDLoc SL(N);
- SDNode *MaskedCond =
- CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
- CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
- Cond);
- SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
- SDValue(MaskedCond, 0),
- SDValue()); // Passing SDValue() adds a
- // glue output.
+ SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond);
CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
N->getOperand(2), // Basic Block
- VCC.getValue(0), // Chain
- VCC.getValue(1)); // Glue
- return;
+ VCC.getValue(0));
}
// This is here because there isn't a way to use the generated sub0_sub1 as the
@@ -1427,7 +1544,6 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
-
unsigned Mods = 0;
Src = In;
@@ -1491,62 +1607,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
-void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
- MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo();
-
- // Handle the perverse case where a frame index is being stored. We don't
- // want to see multiple frame index operands on the same instruction since
- // it complicates things and violates some assumptions about frame index
- // lowering.
- for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd();
- I != E; ++I) {
- SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
-
- // It's possible that we have a frame index defined in the function that
- // isn't used in this block.
- if (FI.use_empty())
- continue;
-
- // Skip over the AssertZext inserted during lowering.
- SDValue EffectiveFI = FI;
- auto It = FI->use_begin();
- if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
- EffectiveFI = SDValue(*It, 0);
- It = EffectiveFI->use_begin();
- }
-
- for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
- SDUse &Use = It.getUse();
- SDNode *User = Use.getUser();
- unsigned OpIdx = It.getOperandNo();
- ++It;
-
- if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
- unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
- if (OpIdx == PtrIdx)
- continue;
-
- unsigned OpN = M->getNumOperands();
- SDValue NewOps[8];
-
- assert(OpN < array_lengthof(NewOps));
- for (unsigned Op = 0; Op != OpN; ++Op) {
- if (Op != OpIdx) {
- NewOps[Op] = M->getOperand(Op);
- continue;
- }
-
- MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- SDLoc(M), MVT::i32, FI);
- NewOps[Op] = SDValue(Mov, 0);
- }
-
- CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
- }
- }
- }
-}
-
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
OpenPOWER on IntegriCloud