summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp501
1 files changed, 326 insertions, 175 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 68d557a..df4461e 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -39,18 +39,17 @@ namespace {
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
- const AMDGPUSubtarget &Subtarget;
+ const AMDGPUSubtarget *Subtarget;
public:
AMDGPUDAGToDAGISel(TargetMachine &TM);
virtual ~AMDGPUDAGToDAGISel();
-
+ bool runOnMachineFunction(MachineFunction &MF) override;
SDNode *Select(SDNode *N) override;
const char *getPassName() const override;
void PostprocessISelDAG() override;
private:
bool isInlineImmediate(SDNode *N) const;
- inline SDValue getSmallIPtrImm(unsigned Imm);
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
@@ -79,6 +78,8 @@ private:
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
+ SDNode *glueCopyToM0(SDNode *N) const;
+
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
@@ -95,9 +96,10 @@ private:
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &Offset) const;
+ SDValue &SOffset, SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &Offset,
+ SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
@@ -120,6 +122,11 @@ private:
SDNode *SelectADD_SUB_I64(SDNode *N);
SDNode *SelectDIV_SCALE(SDNode *N);
+ SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width);
+ SDNode *SelectS_BFEFromShifts(SDNode *N);
+ SDNode *SelectS_BFE(SDNode *N);
+
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
@@ -132,7 +139,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
}
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
- : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
+ : SelectionDAGISel(TM) {}
+
+bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
+ return SelectionDAGISel::runOnMachineFunction(MF);
}
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
@@ -156,7 +167,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
switch (N->getMachineOpcode()) {
default: {
const MCInstrDesc &Desc =
- TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode());
+ Subtarget->getInstrInfo()->get(N->getMachineOpcode());
unsigned OpIdx = Desc.getNumDefs() + OpNo;
if (OpIdx >= Desc.getNumOperands())
return nullptr;
@@ -164,42 +175,38 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
if (RegClass == -1)
return nullptr;
- return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass);
+ return Subtarget->getRegisterInfo()->getRegClass(RegClass);
}
case AMDGPU::REG_SEQUENCE: {
unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
const TargetRegisterClass *SuperRC =
- TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID);
+ Subtarget->getRegisterInfo()->getRegClass(RCID);
SDValue SubRegOp = N->getOperand(OpNo + 1);
unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
- return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
- SuperRC, SubRegIdx);
+ return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
+ SubRegIdx);
}
}
}
-SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
-}
-
bool AMDGPUDAGToDAGISel::SelectADDRParam(
SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
} else {
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
}
} else if (Addr.getOpcode() == ISD::ADD) {
R1 = Addr.getOperand(0);
R2 = Addr.getOperand(1);
} else {
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
}
return true;
}
@@ -222,21 +229,47 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
} else {
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
}
} else if (Addr.getOpcode() == ISD::ADD) {
R1 = Addr.getOperand(0);
R2 = Addr.getOperand(1);
} else {
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
}
return true;
}
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
+ AMDGPUAS::LOCAL_ADDRESS))
+ return N;
+
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ // Write max value to m0 before each load operation
+
+ SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
+ CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+
+ SDValue Glue = M0.getValue(1);
+
+ SmallVector <SDValue, 8> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ Ops.push_back(Glue);
+ CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
+
+ return N;
+}
+
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -244,7 +277,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return nullptr; // Already selected.
}
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (isa<AtomicSDNode>(N))
+ N = glueCopyToM0(N);
+
switch (Opc) {
default: break;
// We are selecting i64 ADD here instead of custom lower it during
@@ -253,7 +288,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::ADD:
case ISD::SUB: {
if (N->getValueType(0) != MVT::i64 ||
- ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
return SelectADD_SUB_I64(N);
@@ -262,15 +297,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::BUILD_VECTOR: {
unsigned RegClassID;
- const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
- const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsEq(MVT::i32));
- if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
bool UseVReg = true;
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
U != E; ++U) {
@@ -281,7 +313,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
if (!RC) {
continue;
}
- if (SIRI->isSGPRClass(RC)) {
+ if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
UseVReg = false;
}
}
@@ -320,7 +352,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
}
- SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+ SDLoc DL(N);
+ SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
if (NumVectorElts == 1) {
return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
@@ -334,18 +367,19 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// 1 = Vector Register Class
SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
- RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+ RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
unsigned NOps = N->getNumOperands();
for (unsigned i = 0; i < NOps; i++) {
// XXX: Why is this here?
- if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+ if (isa<RegisterSDNode>(N->getOperand(i))) {
IsRegSeq = false;
break;
}
RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
RegSeqArgs[1 + (2 * i) + 1] =
- CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
+ MVT::i32);
}
if (NOps != NumVectorElts) {
@@ -353,11 +387,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- SDLoc(N), EltVT);
+ DL, EltVT);
for (unsigned i = NOps; i < NumVectorElts; ++i) {
RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
RegSeqArgs[1 + (2 * i) + 1] =
- CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
}
}
@@ -368,30 +402,30 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
case ISD::BUILD_PAIR: {
SDValue RC, SubReg0, SubReg1;
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
break;
}
+ SDLoc DL(N);
if (N->getValueType(0) == MVT::i128) {
- RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
- SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
- SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
+ RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
+ SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
+ SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
} else if (N->getValueType(0) == MVT::i64) {
- RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
- SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
- SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+ RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
+ SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
} else {
llvm_unreachable("Unhandled value type for BUILD_PAIR");
}
const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
N->getOperand(1), SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
- SDLoc(N), N->getValueType(0), Ops);
+ DL, N->getValueType(0), Ops);
}
case ISD::Constant:
case ISD::ConstantFP: {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
break;
@@ -403,38 +437,46 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
Imm = C->getZExtValue();
}
- SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
- CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
- SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
- CurDAG->getConstant(Imm >> 32, MVT::i32));
+ SDLoc DL(N);
+ SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
+ MVT::i32));
+ SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
const SDValue Ops[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
- SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
- SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+ SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
N->getValueType(0), Ops);
}
case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
+ N = glueCopyToM0(N);
+ break;
+ }
+
// To simplify the TableGen patters, we replace all i64 loads with
// v2i32 loads. Alternatively, we could promote i64 loads to v2i32
// during DAG legalization, however, so places (ExpandUnalignedLoad)
// in the DAG legalizer assume that if i64 is legal, so doing this
// promotion early can cause problems.
- EVT VT = N->getValueType(0);
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
- break;
SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
- LD->getBasePtr(), LD->getMemOperand());
- SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+ LD->getBasePtr(), LD->getMemOperand());
+ SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
MVT::i64, NewLoad);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
- SelectCode(NewLoad.getNode());
+ SDNode *Load = glueCopyToM0(NewLoad.getNode());
+ SelectCode(Load);
N = BitCast.getNode();
break;
}
@@ -443,63 +485,68 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// Handle i64 stores here for the same reason mentioned above for loads.
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Value = ST->getValue();
- if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
- break;
+ if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
- SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
- MVT::v2i32, Value);
- SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
- ST->getBasePtr(), ST->getMemOperand());
+ SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+ MVT::v2i32, Value);
+ SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
+ ST->getBasePtr(), ST->getMemOperand());
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+
+ if (NewValue.getOpcode() == ISD::BITCAST) {
+ Select(NewStore.getNode());
+ return SelectCode(NewValue.getNode());
+ }
- if (NewValue.getOpcode() == ISD::BITCAST) {
- Select(NewStore.getNode());
- return SelectCode(NewValue.getNode());
+ // getNode() may fold the bitcast if its input was another bitcast. If that
+ // happens we should only select the new store.
+ N = NewStore.getNode();
}
- // getNode() may fold the bitcast if its input was another bitcast. If that
- // happens we should only select the new store.
- N = NewStore.getNode();
+ N = glueCopyToM0(N);
break;
}
case AMDGPUISD::REGISTER_LOAD: {
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
SDValue Addr, Offset;
+ SDLoc DL(N);
SelectADDRIndirect(N->getOperand(1), Addr, Offset);
const SDValue Ops[] = {
Addr,
Offset,
- CurDAG->getTargetConstant(0, MVT::i32),
+ CurDAG->getTargetConstant(0, DL, MVT::i32),
N->getOperand(0),
};
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
- CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
+ CurDAG->getVTList(MVT::i32, MVT::i64,
+ MVT::Other),
Ops);
}
case AMDGPUISD::REGISTER_STORE: {
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
SDValue Addr, Offset;
SelectADDRIndirect(N->getOperand(2), Addr, Offset);
+ SDLoc DL(N);
const SDValue Ops[] = {
N->getOperand(1),
Addr,
Offset,
- CurDAG->getTargetConstant(0, MVT::i32),
+ CurDAG->getTargetConstant(0, DL, MVT::i32),
N->getOperand(0),
};
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
CurDAG->getVTList(MVT::Other),
Ops);
}
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
- if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
// There is a scalar version available, but unlike the vector version which
@@ -520,21 +567,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
bool Signed = Opc == AMDGPUISD::BFE_I32;
- // Transformation function, pack the offset and width of a BFE into
- // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
- // source, bits [5:0] contain the offset and bits [22:16] the width.
-
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- uint32_t PackedVal = OffsetVal | WidthVal << 16;
-
- SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
- return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
- SDLoc(N),
- MVT::i32,
- N->getOperand(0),
- PackedOffsetWidth);
+ return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
+ N->getOperand(0), OffsetVal, WidthVal);
}
case AMDGPUISD::DIV_SCALE: {
@@ -548,6 +585,14 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
case ISD::ADDRSPACECAST:
return SelectAddrSpaceCast(N);
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SRA:
+ if (N->getValueType(0) != MVT::i32 ||
+ Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ break;
+
+ return SelectS_BFE(N);
}
return SelectCode(N);
@@ -604,13 +649,11 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
}
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
- if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- N->getMemoryVT().bitsLT(MVT::i32)) {
+ if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ N->getMemoryVT().bitsLT(MVT::i32))
return true;
- }
- }
+
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
@@ -681,7 +724,8 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
SDValue& IntPtr) {
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
- IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
+ true);
return true;
}
return false;
@@ -691,7 +735,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
SDValue& BaseReg, SDValue &Offset) {
if (!isa<ConstantSDNode>(Addr)) {
BaseReg = Addr;
- Offset = CurDAG->getIntPtrConstant(0, true);
+ Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
return true;
}
return false;
@@ -706,7 +750,8 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
return true;
// If the pointer address is constant, we can move it to the offset field.
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
@@ -714,30 +759,32 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(CurDAG->getEntryNode()),
AMDGPU::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
return true;
}
// Default case, no offset
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
return true;
}
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
SDValue &Offset) {
ConstantSDNode *C;
+ SDLoc DL(Addr);
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else {
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
}
return true;
@@ -750,8 +797,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
bool IsAdd = (N->getOpcode() == ISD::ADD);
- SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
- SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL, MVT::i32, LHS, Sub0);
@@ -777,7 +824,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
SDValue Args[5] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
SDValue(AddLo,0),
Sub0,
SDValue(AddHi,0),
@@ -808,12 +855,11 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
unsigned OffsetBits) const {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
(OffsetBits == 8 && !isUInt<8>(Offset)))
return false;
- if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
return true;
// On Southern Islands instruction with a negative base value and an offset
@@ -835,15 +881,17 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
}
}
+ SDLoc DL(Addr);
+
// If we have a constant address, prefer to put the constant into the
// offset. This can save moves to load the constant address since multiple
// operations can share the zero base address register, and enables merging
// into read2 / write2 instructions.
if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
if (isUInt<16>(CAddr->getZExtValue())) {
- SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- SDLoc(Addr), MVT::i32, Zero);
+ DL, MVT::i32, Zero);
Base = SDValue(MovZero, 0);
Offset = Addr;
return true;
@@ -852,13 +900,15 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
// default case
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
SDValue &Offset0,
SDValue &Offset1) const {
+ SDLoc DL(Addr);
+
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
@@ -868,8 +918,8 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
// (add n0, c0)
if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
Base = N0;
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
return true;
}
}
@@ -880,21 +930,21 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
assert(4 * DWordOffset0 == CAddr->getZExtValue());
if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
- SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
MachineSDNode *MovZero
= CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- SDLoc(Addr), MVT::i32, Zero);
+ DL, MVT::i32, Zero);
Base = SDValue(MovZero, 0);
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
return true;
}
}
// default case
Base = Addr;
- Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(1, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
return true;
}
@@ -910,62 +960,70 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &TFE) const {
SDLoc DL(Addr);
- GLC = CurDAG->getTargetConstant(0, MVT::i1);
- SLC = CurDAG->getTargetConstant(0, MVT::i1);
- TFE = CurDAG->getTargetConstant(0, MVT::i1);
+ GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
- Idxen = CurDAG->getTargetConstant(0, MVT::i1);
- Offen = CurDAG->getTargetConstant(0, MVT::i1);
- Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
- SOffset = CurDAG->getTargetConstant(0, MVT::i32);
+ Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (isLegalMUBUFImmOffset(C1)) {
-
- if (N0.getOpcode() == ISD::ADD) {
- // (add (add N2, N3), C1) -> addr64
- SDValue N2 = N0.getOperand(0);
- SDValue N3 = N0.getOperand(1);
- Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
- Ptr = N2;
- VAddr = N3;
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
- return;
- }
+ if (N0.getOpcode() == ISD::ADD) {
+ // (add (add N2, N3), C1) -> addr64
+ SDValue N2 = N0.getOperand(0);
+ SDValue N3 = N0.getOperand(1);
+ Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+ Ptr = N2;
+ VAddr = N3;
+ } else {
// (add N0, C1) -> offset
- VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+ VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = N0;
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ }
+
+ if (isLegalMUBUFImmOffset(C1)) {
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+ return;
+ } else if (isUInt<32>(C1->getZExtValue())) {
+ // Illegal offset, store it in soffset.
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
+ 0);
return;
}
}
+
if (Addr.getOpcode() == ISD::ADD) {
// (add N0, N1) -> addr64
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
- Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
+ Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
Ptr = N0;
VAddr = N1;
- Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return;
}
// default case -> offset
- VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+ VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr,
- SDValue &Offset) const {
- SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const {
+ SDValue Ptr, Offen, Idxen, Addr64;
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@@ -985,11 +1043,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
}
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &Offset,
- SDValue &SLC) const {
- SLC = CurDAG->getTargetConstant(0, MVT::i1);
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
+ SDValue GLC, TFE;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
@@ -999,7 +1059,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDLoc DL(Addr);
MachineFunction &MF = CurDAG->getMachineFunction();
const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
@@ -1017,11 +1077,11 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
const SDValue RsrcOps[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
ScratchRsrcDword0,
- CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
ScratchRsrcDword1,
- CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
};
SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
MVT::v2i32, RsrcOps), 0);
@@ -1036,14 +1096,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
if (isLegalMUBUFImmOffset(C1)) {
VAddr = Addr.getOperand(0);
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
}
// (node)
VAddr = Addr;
- ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
@@ -1053,7 +1113,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &TFE) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@@ -1087,7 +1147,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
SDLoc DL(N);
- assert(Subtarget.hasFlatAddressSpace() &&
+ assert(Subtarget->hasFlatAddressSpace() &&
"addrspacecast only supported with flat address space!");
assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
@@ -1116,7 +1176,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
DL,
DestVT,
Src,
- CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32));
+ CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
}
@@ -1125,25 +1185,115 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
// FIXME: This is probably wrong, we should never be defining
// a register class with both VGPRs and SGPRs
- SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, MVT::i32);
+ SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
+ MVT::i32);
const SDValue Ops[] = {
RC,
Src,
- CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
- CurDAG->getConstant(0, MVT::i32)), 0),
- CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+ CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(0, DL, MVT::i32)), 0),
+ CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
- SDLoc(N), N->getValueType(0), Ops);
+ DL, N->getValueType(0), Ops);
}
assert(SrcSize == 64 && DestSize == 64);
return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
}
+SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width) {
+ // Transformation function, pack the offset and width of a BFE into
+ // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+ // source, bits [5:0] contain the offset and bits [22:16] the width.
+ uint32_t PackedVal = Offset | (Width << 16);
+ SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
+ // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
+ // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
+ // Predicate: 0 < b <= c < 32
+
+ const SDValue &Shl = N->getOperand(0);
+ ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (B && C) {
+ uint32_t BVal = B->getZExtValue();
+ uint32_t CVal = C->getZExtValue();
+
+ if (0 < BVal && BVal <= CVal && CVal < 32) {
+ bool Signed = N->getOpcode() == ISD::SRA;
+ unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+ return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
+ CVal - BVal, 32 - CVal);
+ }
+ }
+ return SelectCode(N);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::AND:
+ if (N->getOperand(0).getOpcode() == ISD::SRL) {
+ // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
+ // Predicate: isMask(mask)
+ const SDValue &Srl = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue();
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ }
+ break;
+ case ISD::SRL:
+ if (N->getOperand(0).getOpcode() == ISD::AND) {
+ // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
+ // Predicate: isMask(mask >> b)
+ const SDValue &And = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ } else if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ case ISD::SRA:
+ if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ }
+
+ return SelectCode(N);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
@@ -1161,7 +1311,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
Src = Src.getOperand(0);
}
- SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}
@@ -1169,9 +1319,10 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
+ SDLoc DL(In);
// FIXME: Handle Clamp and Omod
- Clamp = CurDAG->getTargetConstant(0, MVT::i32);
- Omod = CurDAG->getTargetConstant(0, MVT::i32);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
return SelectVOP3Mods(In, Src, SrcMods);
}
@@ -1180,7 +1331,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
SDValue &SrcMods,
SDValue &Omod) const {
// FIXME: Handle Omod
- Omod = CurDAG->getTargetConstant(0, MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
return SelectVOP3Mods(In, Src, SrcMods);
}
@@ -1189,7 +1340,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
SDValue &SrcMods,
SDValue &Clamp,
SDValue &Omod) const {
- Clamp = Omod = CurDAG->getTargetConstant(0, MVT::i32);
+ Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
return SelectVOP3Mods(In, Src, SrcMods);
}
OpenPOWER on IntegriCloud