diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 479 |
1 files changed, 299 insertions, 180 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 64c54cc..b33040b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -11,6 +11,8 @@ /// \brief Defines an instruction selector for the AMDGPU target. // //===----------------------------------------------------------------------===// + +#include "AMDGPUDiagnosticInfoUnsupported.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPURegisterInfo.h" @@ -20,9 +22,9 @@ #include "SIISelLowering.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" @@ -40,12 +42,14 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; + public: AMDGPUDAGToDAGISel(TargetMachine &TM); virtual ~AMDGPUDAGToDAGISel(); bool runOnMachineFunction(MachineFunction &MF) override; SDNode *Select(SDNode *N) override; const char *getPassName() const override; + void PreprocessISelDAG() override; void PostprocessISelDAG() override; private: @@ -91,7 +95,7 @@ private: bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, SDValue &Offset1) const; - void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, + bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; @@ -108,6 +112,16 @@ private: SDValue &TFE) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &GLC) const; + bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, + bool &Imm) const; + bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, + bool &Imm) const; + bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; + bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; + bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; SDNode *SelectAddrSpaceCast(SDNode *N); bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; @@ -273,6 +287,23 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { return N; } +static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { + switch (NumVectorElts) { + case 1: + return AMDGPU::SReg_32RegClassID; + case 2: + return AMDGPU::SReg_64RegClassID; + case 4: + return AMDGPU::SReg_128RegClassID; + case 8: + return AMDGPU::SReg_256RegClassID; + case 16: + return AMDGPU::SReg_512RegClassID; + } + + llvm_unreachable("invalid vector size"); +} + SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { @@ -306,38 +337,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { EVT EltVT = VT.getVectorElementType(); assert(EltVT.bitsEq(MVT::i32)); if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - bool UseVReg = true; - for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); - U != E; ++U) { - if (!U->isMachineOpcode()) { - continue; - } - const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); - if (!RC) { - continue; - } - if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) { - UseVReg = false; - } - } - switch(NumVectorElts) { - case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID : - AMDGPU::SReg_32RegClassID; - break; - case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : - AMDGPU::SReg_64RegClassID; - break; - case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : - AMDGPU::SReg_128RegClassID; - break; - case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : - AMDGPU::SReg_256RegClassID; - break; - case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : - AMDGPU::SReg_512RegClassID; - break; - default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); - } + RegClassID = selectSGPRVectorRegClassID(NumVectorElts); } else { // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions @@ -455,98 +455,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, N->getValueType(0), Ops); } - - case ISD::LOAD: { - LoadSDNode *LD = cast<LoadSDNode>(N); - SDLoc SL(N); - EVT VT = N->getValueType(0); - - if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) { - N = glueCopyToM0(N); - break; - } - - // To simplify the TableGen patters, we replace all i64 loads with - // v2i32 loads. Alternatively, we could promote i64 loads to v2i32 - // during DAG legalization, however, so places (ExpandUnalignedLoad) - // in the DAG legalizer assume that if i64 is legal, so doing this - // promotion early can cause problems. - - SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(), - LD->getBasePtr(), LD->getMemOperand()); - SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, - MVT::i64, NewLoad); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1)); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast); - SDNode *Load = glueCopyToM0(NewLoad.getNode()); - SelectCode(Load); - N = BitCast.getNode(); - break; - } - + case ISD::LOAD: case ISD::STORE: { - // Handle i64 stores here for the same reason mentioned above for loads. - StoreSDNode *ST = cast<StoreSDNode>(N); - SDValue Value = ST->getValue(); - if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) { - - SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N), - MVT::v2i32, Value); - SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue, - ST->getBasePtr(), ST->getMemOperand()); - - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore); - - if (NewValue.getOpcode() == ISD::BITCAST) { - Select(NewStore.getNode()); - return SelectCode(NewValue.getNode()); - } - - // getNode() may fold the bitcast if its input was another bitcast. If that - // happens we should only select the new store. - N = NewStore.getNode(); - } - N = glueCopyToM0(N); break; } - case AMDGPUISD::REGISTER_LOAD: { - if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - break; - SDValue Addr, Offset; - - SDLoc DL(N); - SelectADDRIndirect(N->getOperand(1), Addr, Offset); - const SDValue Ops[] = { - Addr, - Offset, - CurDAG->getTargetConstant(0, DL, MVT::i32), - N->getOperand(0), - }; - return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL, - CurDAG->getVTList(MVT::i32, MVT::i64, - MVT::Other), - Ops); - } - case AMDGPUISD::REGISTER_STORE: { - if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - break; - SDValue Addr, Offset; - SelectADDRIndirect(N->getOperand(2), Addr, Offset); - SDLoc DL(N); - const SDValue Ops[] = { - N->getOperand(1), - Addr, - Offset, - CurDAG->getTargetConstant(0, DL, MVT::i32), - N->getOperand(0), - }; - return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL, - CurDAG->getVTList(MVT::Other), - Ops); - } - case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) @@ -575,7 +489,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), N->getOperand(0), OffsetVal, WidthVal); - } case AMDGPUISD::DIV_SCALE: { return SelectDIV_SCALE(N); @@ -601,7 +514,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } - bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { assert(AS != 0 && "Use checkPrivateAddress instead."); if (!Ptr) @@ -681,7 +593,7 @@ bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { if (checkPrivateAddress(N->getMemOperand())) { if (MMO) { const PseudoSourceValue *PSV = MMO->getPseudoValue(); - if (PSV && PSV == PseudoSourceValue::getConstantPool()) { + if (PSV && PSV->isConstantPool()) { return true; } } @@ -847,7 +759,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { unsigned Opc = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; - // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod + // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, + // omod SDValue Ops[8]; SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); @@ -883,15 +796,39 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, Offset = N1; return true; } - } + } else if (Addr.getOpcode() == ISD::SUB) { + // sub C, x -> add (sub 0, x), C + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { + int64_t ByteOffset = C->getSExtValue(); + if (isUInt<16>(ByteOffset)) { + SDLoc DL(Addr); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); + + // XXX - This is kind of hacky. Create a dummy sub node so we can check + // the known bits in isDSOffsetLegal. We need to emit the selected node + // here, so this is thrown away. + SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, + Zero, Addr.getOperand(1)); + + if (isDSOffsetLegal(Sub, ByteOffset, 16)) { + MachineSDNode *MachineSub + = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, + Zero, Addr.getOperand(1)); + + Base = SDValue(MachineSub, 0); + Offset = Addr.getOperand(0); + return true; + } + } + } + } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { + // If we have a constant address, prefer to put the constant into the + // offset. This can save moves to load the constant address since multiple + // operations can share the zero base address register, and enables merging + // into read2 / write2 instructions. - SDLoc DL(Addr); + SDLoc DL(Addr); - // If we have a constant address, prefer to put the constant into the - // offset. This can save moves to load the constant address since multiple - // operations can share the zero base address register, and enables merging - // into read2 / write2 instructions. - if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { if (isUInt<16>(CAddr->getZExtValue())) { SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, @@ -904,10 +841,11 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, // default case Base = Addr; - Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); return true; } +// TODO: If offset is too big, put low 16-bit into offset. bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, SDValue &Offset0, SDValue &Offset1) const { @@ -926,9 +864,35 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); return true; } - } - - if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { + } else if (Addr.getOpcode() == ISD::SUB) { + // sub C, x -> add (sub 0, x), C + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { + unsigned DWordOffset0 = C->getZExtValue() / 4; + unsigned DWordOffset1 = DWordOffset0 + 1; + + if (isUInt<8>(DWordOffset0)) { + SDLoc DL(Addr); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); + + // XXX - This is kind of hacky. Create a dummy sub node so we can check + // the known bits in isDSOffsetLegal. We need to emit the selected node + // here, so this is thrown away. + SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, + Zero, Addr.getOperand(1)); + + if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { + MachineSDNode *MachineSub + = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, + Zero, Addr.getOperand(1)); + + Base = SDValue(MachineSub, 0); + Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); + Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); + return true; + } + } + } + } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { unsigned DWordOffset0 = CAddr->getZExtValue() / 4; unsigned DWordOffset1 = DWordOffset0 + 1; assert(4 * DWordOffset0 == CAddr->getZExtValue()); @@ -956,12 +920,16 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { return isUInt<12>(Imm->getZExtValue()); } -void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, +bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, SDValue &TFE) const { + // Subtarget prefers to use flat instruction + if (Subtarget->useFlatForGlobal()) + return false; + SDLoc DL(Addr); GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -994,14 +962,14 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, if (isLegalMUBUFImmOffset(C1)) { Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); - return; + return true; } else if (isUInt<32>(C1->getZExtValue())) { // Illegal offset, store it in soffset. Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 0); - return; + return true; } } @@ -1013,7 +981,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, Ptr = N0; VAddr = N1; Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); - return; + return true; } // default case -> offset @@ -1021,6 +989,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, Ptr = Addr; Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); + return true; } bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, @@ -1033,8 +1002,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return false; - SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE); + if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, + GLC, SLC, TFE)) + return false; ConstantSDNode *C = cast<ConstantSDNode>(Addr64); if (C->getSExtValue()) { @@ -1052,8 +1022,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, - SDValue &Offset, - SDValue &SLC) const { + SDValue &Offset, + SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); SDValue GLC, TFE; @@ -1066,36 +1036,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, SDLoc DL(Addr); MachineFunction &MF = CurDAG->getMachineFunction(); - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const SITargetLowering& Lowering = - *static_cast<const SITargetLowering*>(getTargetLowering()); - - unsigned ScratchOffsetReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); - Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, - ScratchOffsetReg, MVT::i32); - SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); - SDValue ScratchRsrcDword0 = - SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); + const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); - SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); - SDValue ScratchRsrcDword1 = - SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); - - const SDValue RsrcOps[] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), - ScratchRsrcDword0, - CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), - ScratchRsrcDword1, - CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32), - }; - SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, - MVT::v2i32, RsrcOps), 0); - Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); - SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); + Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); + SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); // (add n0, c1) if (CurDAG->isBaseWithConstantOffset(Addr)) { @@ -1126,8 +1070,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); - SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE); + if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, + GLC, SLC, TFE)) + return false; if (!cast<ConstantSDNode>(Offen)->getSExtValue() && !cast<ConstantSDNode>(Idxen)->getSExtValue() && @@ -1153,18 +1098,134 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); } +/// +/// \param EncodedOffset This is the immediate value that will be encoded +/// directly into the instruction. On SI/CI the \p EncodedOffset +/// will be in units of dwords and on VI+ it will be units of bytes. +static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, + int64_t EncodedOffset) { + return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? + isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, + SDValue &Offset, bool &Imm) const { + + // FIXME: Handle non-constant offsets. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); + if (!C) + return false; + + SDLoc SL(ByteOffsetNode); + AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); + int64_t ByteOffset = C->getSExtValue(); + int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? + ByteOffset >> 2 : ByteOffset; + + if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { + Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); + Imm = true; + return true; + } + + if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) + return false; + + if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { + // 32-bit Immediates are supported on Sea Islands. + Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); + } else { + SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); + Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, + C32Bit), 0); + } + Imm = false; + return true; +} + +bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, + SDValue &Offset, bool &Imm) const { + + SDLoc SL(Addr); + if (CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + + if (SelectSMRDOffset(N1, Offset, Imm)) { + SBase = N0; + return true; + } + } + SBase = Addr; + Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); + Imm = true; + return true; +} + +bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, + SDValue &Offset) const { + bool Imm; + return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; +} + +bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, + SDValue &Offset) const { + + if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) + return false; + + bool Imm; + if (!SelectSMRD(Addr, SBase, Offset, Imm)) + return false; + + return !Imm && isa<ConstantSDNode>(Offset); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, + SDValue &Offset) const { + bool Imm; + return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && + !isa<ConstantSDNode>(Offset); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, + SDValue &Offset) const { + bool Imm; + return SelectSMRDOffset(Addr, Offset, Imm) && Imm; +} + +bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, + SDValue &Offset) const { + if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) + return false; + + bool Imm; + if (!SelectSMRDOffset(Addr, Offset, Imm)) + return false; + + return !Imm && isa<ConstantSDNode>(Offset); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, + SDValue &Offset) const { + bool Imm; + return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && + !isa<ConstantSDNode>(Offset); +} + // FIXME: This is incorrect and only enough to be able to compile. SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); SDLoc DL(N); + const MachineFunction &MF = CurDAG->getMachineFunction(); + DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(), + "addrspacecast not implemented"); + CurDAG->getContext()->diagnose(NotImplemented); + assert(Subtarget->hasFlatAddressSpace() && "addrspacecast only supported with flat address space!"); - assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && - ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) && - "Cannot cast address space to / from constant address!"); - assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && "Can only cast to / from flat address space!"); @@ -1190,7 +1251,6 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); } - if (DestSize > SrcSize) { assert(SrcSize == 32 && DestSize == 64); @@ -1371,6 +1431,65 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } +void AMDGPUDAGToDAGISel::PreprocessISelDAG() { + bool Modified = false; + + // XXX - Other targets seem to be able to do this without a worklist. + SmallVector<LoadSDNode *, 8> LoadsToReplace; + SmallVector<StoreSDNode *, 8> StoresToReplace; + + for (SDNode &Node : CurDAG->allnodes()) { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { + EVT VT = LD->getValueType(0); + if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) + continue; + + // To simplify the TableGen patters, we replace all i64 loads with v2i32 + // loads. Alternatively, we could promote i64 loads to v2i32 during DAG + // legalization, however, so places (ExpandUnalignedLoad) in the DAG + // legalizer assume that if i64 is legal, so doing this promotion early + // can cause problems. + LoadsToReplace.push_back(LD); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { + // Handle i64 stores here for the same reason mentioned above for loads. + SDValue Value = ST->getValue(); + if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) + continue; + StoresToReplace.push_back(ST); + } + } + + for (LoadSDNode *LD : LoadsToReplace) { + SDLoc SL(LD); + + SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), + LD->getBasePtr(), LD->getMemOperand()); + SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, + MVT::i64, NewLoad); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); + Modified = true; + } + + for (StoreSDNode *ST : StoresToReplace) { + SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), + MVT::v2i32, ST->getValue()); + const SDValue StoreOps[] = { + ST->getChain(), + NewValue, + ST->getBasePtr(), + ST->getOffset() + }; + + CurDAG->UpdateNodeOperands(ST, StoreOps); + Modified = true; + } + + // XXX - Is this necessary? + if (Modified) + CurDAG->RemoveDeadNodes(); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); |