diff options
Diffstat (limited to 'contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 501 |
1 files changed, 326 insertions, 175 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 68d557a..df4461e 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -39,18 +39,17 @@ namespace { class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. - const AMDGPUSubtarget &Subtarget; + const AMDGPUSubtarget *Subtarget; public: AMDGPUDAGToDAGISel(TargetMachine &TM); virtual ~AMDGPUDAGToDAGISel(); - + bool runOnMachineFunction(MachineFunction &MF) override; SDNode *Select(SDNode *N) override; const char *getPassName() const override; void PostprocessISelDAG() override; private: bool isInlineImmediate(SDNode *N) const; - inline SDValue getSmallIPtrImm(unsigned Imm); bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, const R600InstrInfo *TII); bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); @@ -79,6 +78,8 @@ private: bool isLocalLoad(const LoadSDNode *N) const; bool isRegionLoad(const LoadSDNode *N) const; + SDNode *glueCopyToM0(SDNode *N) const; + const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, @@ -95,9 +96,10 @@ private: SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, - SDValue &Offset) const; + SDValue &SOffset, SDValue &Offset, SDValue &GLC, + SDValue &SLC, SDValue &TFE) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, - SDValue &VAddr, SDValue &Offset, + SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const; @@ -120,6 +122,11 @@ private: SDNode *SelectADD_SUB_I64(SDNode *N); SDNode *SelectDIV_SCALE(SDNode *N); + SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, + uint32_t Offset, uint32_t Width); + SDNode *SelectS_BFEFromShifts(SDNode *N); + SDNode *SelectS_BFE(SDNode *N); + // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" }; @@ -132,7 +139,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { } AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) - : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) { + : SelectionDAGISel(TM) {} + +bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); + return SelectionDAGISel::runOnMachineFunction(MF); } AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { @@ -156,7 +167,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, switch (N->getMachineOpcode()) { default: { const MCInstrDesc &Desc = - TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode()); + Subtarget->getInstrInfo()->get(N->getMachineOpcode()); unsigned OpIdx = Desc.getNumDefs() + OpNo; if (OpIdx >= Desc.getNumOperands()) return nullptr; @@ -164,42 +175,38 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, if (RegClass == -1) return nullptr; - return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass); + return Subtarget->getRegisterInfo()->getRegClass(RegClass); } case AMDGPU::REG_SEQUENCE: { unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); const TargetRegisterClass *SuperRC = - TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID); + Subtarget->getRegisterInfo()->getRegClass(RCID); SDValue SubRegOp = N->getOperand(OpNo + 1); unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); - return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg( - SuperRC, SubRegIdx); + return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, + SubRegIdx); } } } -SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); -} - bool AMDGPUDAGToDAGISel::SelectADDRParam( SDValue Addr, SDValue& R1, SDValue& R2) { if (Addr.getOpcode() == ISD::FrameIndex) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - R2 = CurDAG->getTargetConstant(0, MVT::i32); + R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); } else { R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i32); + R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); } } else if (Addr.getOpcode() == ISD::ADD) { R1 = Addr.getOperand(0); R2 = Addr.getOperand(1); } else { R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i32); + R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); } return true; } @@ -222,21 +229,47 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { if (Addr.getOpcode() == ISD::FrameIndex) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); - R2 = CurDAG->getTargetConstant(0, MVT::i64); + R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); } else { R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i64); + R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); } } else if (Addr.getOpcode() == ISD::ADD) { R1 = Addr.getOperand(0); R2 = Addr.getOperand(1); } else { R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i64); + R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); } return true; } +SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { + if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || + !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), + AMDGPUAS::LOCAL_ADDRESS)) + return N; + + const SITargetLowering& Lowering = + *static_cast<const SITargetLowering*>(getTargetLowering()); + + // Write max value to m0 before each load operation + + SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), + CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); + + SDValue Glue = M0.getValue(1); + + SmallVector <SDValue, 8> Ops; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + Ops.push_back(N->getOperand(i)); + } + Ops.push_back(Glue); + CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); + + return N; +} + SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { @@ -244,7 +277,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return nullptr; // Already selected. } - const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (isa<AtomicSDNode>(N)) + N = glueCopyToM0(N); + switch (Opc) { default: break; // We are selecting i64 ADD here instead of custom lower it during @@ -253,7 +288,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::ADD: case ISD::SUB: { if (N->getValueType(0) != MVT::i64 || - ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) break; return SelectADD_SUB_I64(N); @@ -262,15 +297,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { case AMDGPUISD::BUILD_VERTICAL_VECTOR: case ISD::BUILD_VECTOR: { unsigned RegClassID; - const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); - const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); + const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); assert(EltVT.bitsEq(MVT::i32)); - if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { bool UseVReg = true; for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); U != E; ++U) { @@ -281,7 +313,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (!RC) { continue; } - if (SIRI->isSGPRClass(RC)) { + if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) { UseVReg = false; } } @@ -320,7 +352,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } } - SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); + SDLoc DL(N); + SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); if (NumVectorElts == 1) { return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, @@ -334,18 +367,19 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // 1 = Vector Register Class SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); - RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); + RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); bool IsRegSeq = true; unsigned NOps = N->getNumOperands(); for (unsigned i = 0; i < NOps; i++) { // XXX: Why is this here? - if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { + if (isa<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } RegSeqArgs[1 + (2 * i)] = N->getOperand(i); RegSeqArgs[1 + (2 * i) + 1] = - CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, + MVT::i32); } if (NOps != NumVectorElts) { @@ -353,11 +387,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - SDLoc(N), EltVT); + DL, EltVT); for (unsigned i = NOps; i < NumVectorElts; ++i) { RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); RegSeqArgs[1 + (2 * i) + 1] = - CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); } } @@ -368,30 +402,30 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { + if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } + SDLoc DL(N); if (N->getValueType(0) == MVT::i128) { - RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); - SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); - SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); + SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); + SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { - RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32); - SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); - SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); + SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); + SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); } else { llvm_unreachable("Unhandled value type for BUILD_PAIR"); } const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, - SDLoc(N), N->getValueType(0), Ops); + DL, N->getValueType(0), Ops); } case ISD::Constant: case ISD::ConstantFP: { - const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); - if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || + if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) break; @@ -403,38 +437,46 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { Imm = C->getZExtValue(); } - SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, - CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32)); - SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, - CurDAG->getConstant(Imm >> 32, MVT::i32)); + SDLoc DL(N); + SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, + MVT::i32)); + SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); const SDValue Ops[] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), - SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), - SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), + SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), + SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, N->getValueType(0), Ops); } case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(N); + SDLoc SL(N); + EVT VT = N->getValueType(0); + + if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) { + N = glueCopyToM0(N); + break; + } + // To simplify the TableGen patters, we replace all i64 loads with // v2i32 loads. Alternatively, we could promote i64 loads to v2i32 // during DAG legalization, however, so places (ExpandUnalignedLoad) // in the DAG legalizer assume that if i64 is legal, so doing this // promotion early can cause problems. - EVT VT = N->getValueType(0); - LoadSDNode *LD = cast<LoadSDNode>(N); - if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) - break; SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(), - LD->getBasePtr(), LD->getMemOperand()); - SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N), + LD->getBasePtr(), LD->getMemOperand()); + SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, MVT::i64, NewLoad); CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1)); CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast); - SelectCode(NewLoad.getNode()); + SDNode *Load = glueCopyToM0(NewLoad.getNode()); + SelectCode(Load); N = BitCast.getNode(); break; } @@ -443,63 +485,68 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // Handle i64 stores here for the same reason mentioned above for loads. StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Value = ST->getValue(); - if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) - break; + if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) { - SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N), - MVT::v2i32, Value); - SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue, - ST->getBasePtr(), ST->getMemOperand()); + SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N), + MVT::v2i32, Value); + SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue, + ST->getBasePtr(), ST->getMemOperand()); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore); + + if (NewValue.getOpcode() == ISD::BITCAST) { + Select(NewStore.getNode()); + return SelectCode(NewValue.getNode()); + } - if (NewValue.getOpcode() == ISD::BITCAST) { - Select(NewStore.getNode()); - return SelectCode(NewValue.getNode()); + // getNode() may fold the bitcast if its input was another bitcast. If that + // happens we should only select the new store. + N = NewStore.getNode(); } - // getNode() may fold the bitcast if its input was another bitcast. If that - // happens we should only select the new store. - N = NewStore.getNode(); + N = glueCopyToM0(N); break; } case AMDGPUISD::REGISTER_LOAD: { - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; SDValue Addr, Offset; + SDLoc DL(N); SelectADDRIndirect(N->getOperand(1), Addr, Offset); const SDValue Ops[] = { Addr, Offset, - CurDAG->getTargetConstant(0, MVT::i32), + CurDAG->getTargetConstant(0, DL, MVT::i32), N->getOperand(0), }; - return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N), - CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other), + return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL, + CurDAG->getVTList(MVT::i32, MVT::i64, + MVT::Other), Ops); } case AMDGPUISD::REGISTER_STORE: { - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; SDValue Addr, Offset; SelectADDRIndirect(N->getOperand(2), Addr, Offset); + SDLoc DL(N); const SDValue Ops[] = { N->getOperand(1), Addr, Offset, - CurDAG->getTargetConstant(0, MVT::i32), + CurDAG->getTargetConstant(0, DL, MVT::i32), N->getOperand(0), }; - return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N), + return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL, CurDAG->getVTList(MVT::Other), Ops); } case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { - if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) break; // There is a scalar version available, but unlike the vector version which @@ -520,21 +567,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { bool Signed = Opc == AMDGPUISD::BFE_I32; - // Transformation function, pack the offset and width of a BFE into - // the format expected by the S_BFE_I32 / S_BFE_U32. In the second - // source, bits [5:0] contain the offset and bits [22:16] the width. - uint32_t OffsetVal = Offset->getZExtValue(); uint32_t WidthVal = Width->getZExtValue(); - uint32_t PackedVal = OffsetVal | WidthVal << 16; - - SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32); - return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, - SDLoc(N), - MVT::i32, - N->getOperand(0), - PackedOffsetWidth); + return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), + N->getOperand(0), OffsetVal, WidthVal); } case AMDGPUISD::DIV_SCALE: { @@ -548,6 +585,14 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } case ISD::ADDRSPACECAST: return SelectAddrSpaceCast(N); + case ISD::AND: + case ISD::SRL: + case ISD::SRA: + if (N->getValueType(0) != MVT::i32 || + Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + break; + + return SelectS_BFE(N); } return SelectCode(N); @@ -604,13 +649,11 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { } bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { - if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { - const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); - if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - N->getMemoryVT().bitsLT(MVT::i32)) { + if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) + if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || + N->getMemoryVT().bitsLT(MVT::i32)) return true; - } - } + return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); } @@ -681,7 +724,8 @@ const char *AMDGPUDAGToDAGISel::getPassName() const { bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr) { if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { - IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); + IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), + true); return true; } return false; @@ -691,7 +735,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, SDValue& BaseReg, SDValue &Offset) { if (!isa<ConstantSDNode>(Addr)) { BaseReg = Addr; - Offset = CurDAG->getIntPtrConstant(0, true); + Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); return true; } return false; @@ -706,7 +750,8 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, && isInt<16>(IMMOffset->getZExtValue())) { Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), + MVT::i32); return true; // If the pointer address is constant, we can move it to the offset field. } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) @@ -714,30 +759,32 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(CurDAG->getEntryNode()), AMDGPU::ZERO, MVT::i32); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), + MVT::i32); return true; } // Default case, no offset Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); return true; } bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset) { ConstantSDNode *C; + SDLoc DL(Addr); if ((C = dyn_cast<ConstantSDNode>(Addr))) { Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); - Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); } else { Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); + Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); } return true; @@ -750,8 +797,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { bool IsAdd = (N->getOpcode() == ISD::ADD); - SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); - SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); + SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); + SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i32, LHS, Sub0); @@ -777,7 +824,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); SDValue Args[5] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), SDValue(AddLo,0), Sub0, SDValue(AddHi,0), @@ -808,12 +855,11 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, unsigned OffsetBits) const { - const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if ((OffsetBits == 16 && !isUInt<16>(Offset)) || (OffsetBits == 8 && !isUInt<8>(Offset))) return false; - if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) return true; // On Southern Islands instruction with a negative base value and an offset @@ -835,15 +881,17 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, } } + SDLoc DL(Addr); + // If we have a constant address, prefer to put the constant into the // offset. This can save moves to load the constant address since multiple // operations can share the zero base address register, and enables merging // into read2 / write2 instructions. if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { if (isUInt<16>(CAddr->getZExtValue())) { - SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, - SDLoc(Addr), MVT::i32, Zero); + DL, MVT::i32, Zero); Base = SDValue(MovZero, 0); Offset = Addr; return true; @@ -852,13 +900,15 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, // default case Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i16); + Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); return true; } bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, SDValue &Offset0, SDValue &Offset1) const { + SDLoc DL(Addr); + if (CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); @@ -868,8 +918,8 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, // (add n0, c0) if (isDSOffsetLegal(N0, DWordOffset1, 8)) { Base = N0; - Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8); - Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8); + Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); + Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); return true; } } @@ -880,21 +930,21 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, assert(4 * DWordOffset0 == CAddr->getZExtValue()); if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { - SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, - SDLoc(Addr), MVT::i32, Zero); + DL, MVT::i32, Zero); Base = SDValue(MovZero, 0); - Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8); - Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8); + Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); + Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); return true; } } // default case Base = Addr; - Offset0 = CurDAG->getTargetConstant(0, MVT::i8); - Offset1 = CurDAG->getTargetConstant(1, MVT::i8); + Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); + Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); return true; } @@ -910,62 +960,70 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &TFE) const { SDLoc DL(Addr); - GLC = CurDAG->getTargetConstant(0, MVT::i1); - SLC = CurDAG->getTargetConstant(0, MVT::i1); - TFE = CurDAG->getTargetConstant(0, MVT::i1); + GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); + SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); + TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); - Idxen = CurDAG->getTargetConstant(0, MVT::i1); - Offen = CurDAG->getTargetConstant(0, MVT::i1); - Addr64 = CurDAG->getTargetConstant(0, MVT::i1); - SOffset = CurDAG->getTargetConstant(0, MVT::i32); + Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); + Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); + Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); + SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); if (CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); ConstantSDNode *C1 = cast<ConstantSDNode>(N1); - if (isLegalMUBUFImmOffset(C1)) { - - if (N0.getOpcode() == ISD::ADD) { - // (add (add N2, N3), C1) -> addr64 - SDValue N2 = N0.getOperand(0); - SDValue N3 = N0.getOperand(1); - Addr64 = CurDAG->getTargetConstant(1, MVT::i1); - Ptr = N2; - VAddr = N3; - Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); - return; - } + if (N0.getOpcode() == ISD::ADD) { + // (add (add N2, N3), C1) -> addr64 + SDValue N2 = N0.getOperand(0); + SDValue N3 = N0.getOperand(1); + Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); + Ptr = N2; + VAddr = N3; + } else { // (add N0, C1) -> offset - VAddr = CurDAG->getTargetConstant(0, MVT::i32); + VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); Ptr = N0; - Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); + } + + if (isLegalMUBUFImmOffset(C1)) { + Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); + return; + } else if (isUInt<32>(C1->getZExtValue())) { + // Illegal offset, store it in soffset. + Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); + SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), + 0); return; } } + if (Addr.getOpcode() == ISD::ADD) { // (add N0, N1) -> addr64 SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); - Addr64 = CurDAG->getTargetConstant(1, MVT::i1); + Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); Ptr = N0; VAddr = N1; - Offset = CurDAG->getTargetConstant(0, MVT::i16); + Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); return; } // default case -> offset - VAddr = CurDAG->getTargetConstant(0, MVT::i32); + VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); Ptr = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i16); + Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); } bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, - SDValue &VAddr, - SDValue &Offset) const { - SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE; + SDValue &VAddr, SDValue &SOffset, + SDValue &Offset, SDValue &GLC, + SDValue &SLC, SDValue &TFE) const { + SDValue Ptr, Offen, Idxen, Addr64; SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, GLC, SLC, TFE); @@ -985,11 +1043,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, } bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, - SDValue &VAddr, SDValue &Offset, - SDValue &SLC) const { - SLC = CurDAG->getTargetConstant(0, MVT::i1); + SDValue &VAddr, SDValue &SOffset, + SDValue &Offset, + SDValue &SLC) const { + SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); + SDValue GLC, TFE; - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset); + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); } bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, @@ -999,7 +1059,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, SDLoc DL(Addr); MachineFunction &MF = CurDAG->getMachineFunction(); const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); const SITargetLowering& Lowering = *static_cast<const SITargetLowering*>(getTargetLowering()); @@ -1017,11 +1077,11 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); const SDValue RsrcOps[] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), ScratchRsrcDword0, - CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), ScratchRsrcDword1, - CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), + CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32), }; SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v2i32, RsrcOps), 0); @@ -1036,14 +1096,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, if (isLegalMUBUFImmOffset(C1)) { VAddr = Addr.getOperand(0); - ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); + ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; } } // (node) VAddr = Addr; - ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); + ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); return true; } @@ -1053,7 +1113,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &TFE) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; const SIInstrInfo *TII = - static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo()); + static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, GLC, SLC, TFE); @@ -1087,7 +1147,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); SDLoc DL(N); - assert(Subtarget.hasFlatAddressSpace() && + assert(Subtarget->hasFlatAddressSpace() && "addrspacecast only supported with flat address space!"); assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && @@ -1116,7 +1176,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { DL, DestVT, Src, - CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32)); + CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); } @@ -1125,25 +1185,115 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { // FIXME: This is probably wrong, we should never be defining // a register class with both VGPRs and SGPRs - SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, MVT::i32); + SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, + MVT::i32); const SDValue Ops[] = { RC, Src, - CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), - SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, - CurDAG->getConstant(0, MVT::i32)), 0), - CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) + CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), + SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getConstant(0, DL, MVT::i32)), 0), + CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, - SDLoc(N), N->getValueType(0), Ops); + DL, N->getValueType(0), Ops); } assert(SrcSize == 64 && DestSize == 64); return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); } +SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, + uint32_t Offset, uint32_t Width) { + // Transformation function, pack the offset and width of a BFE into + // the format expected by the S_BFE_I32 / S_BFE_U32. In the second + // source, bits [5:0] contain the offset and bits [22:16] the width. + uint32_t PackedVal = Offset | (Width << 16); + SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); + + return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); +} + +SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { + // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) + // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) + // Predicate: 0 < b <= c < 32 + + const SDValue &Shl = N->getOperand(0); + ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + + if (B && C) { + uint32_t BVal = B->getZExtValue(); + uint32_t CVal = C->getZExtValue(); + + if (0 < BVal && BVal <= CVal && CVal < 32) { + bool Signed = N->getOpcode() == ISD::SRA; + unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; + + return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), + CVal - BVal, 32 - CVal); + } + } + return SelectCode(N); +} + +SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { + switch (N->getOpcode()) { + case ISD::AND: + if (N->getOperand(0).getOpcode() == ISD::SRL) { + // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" + // Predicate: isMask(mask) + const SDValue &Srl = N->getOperand(0); + ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); + ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); + + if (Shift && Mask) { + uint32_t ShiftVal = Shift->getZExtValue(); + uint32_t MaskVal = Mask->getZExtValue(); + + if (isMask_32(MaskVal)) { + uint32_t WidthVal = countPopulation(MaskVal); + + return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), + ShiftVal, WidthVal); + } + } + } + break; + case ISD::SRL: + if (N->getOperand(0).getOpcode() == ISD::AND) { + // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" + // Predicate: isMask(mask >> b) + const SDValue &And = N->getOperand(0); + ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); + + if (Shift && Mask) { + uint32_t ShiftVal = Shift->getZExtValue(); + uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; + + if (isMask_32(MaskVal)) { + uint32_t WidthVal = countPopulation(MaskVal); + + return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), + ShiftVal, WidthVal); + } + } + } else if (N->getOperand(0).getOpcode() == ISD::SHL) + return SelectS_BFEFromShifts(N); + break; + case ISD::SRA: + if (N->getOperand(0).getOpcode() == ISD::SHL) + return SelectS_BFEFromShifts(N); + break; + } + + return SelectCode(N); +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const { @@ -1161,7 +1311,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, Src = Src.getOperand(0); } - SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32); + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); return true; } @@ -1169,9 +1319,10 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { + SDLoc DL(In); // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, MVT::i32); - Omod = CurDAG->getTargetConstant(0, MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); return SelectVOP3Mods(In, Src, SrcMods); } @@ -1180,7 +1331,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Omod) const { // FIXME: Handle Omod - Omod = CurDAG->getTargetConstant(0, MVT::i32); + Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); return SelectVOP3Mods(In, Src, SrcMods); } @@ -1189,7 +1340,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { - Clamp = Omod = CurDAG->getTargetConstant(0, MVT::i32); + Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); return SelectVOP3Mods(In, Src, SrcMods); } |