diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU')
11 files changed, 103 insertions, 38 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 709d753..0a5309b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -264,6 +264,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { // TODO: CodeSize should account for multiple functions. + + // TODO: Should we count size of debug info? + if (MI.isDebugValue()) + continue; + + // FIXME: This is reporting 0 for many instructions. CodeSize += MI.getDesc().Size; unsigned numOperands = MI.getNumOperands(); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 4a65bfc..57b7a73 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { // // TODO: Check isTriviallyVectorizable for calls and handle other // instructions. -static bool canVectorizeInst(Instruction *Inst) { +static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: - case Instruction::Store: case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; + case Instruction::Store: { + // Must be the stored pointer operand, not a stored value. + StoreInst *SI = cast<StoreInst>(Inst); + return SI->getPointerOperand() == User; + } default: return false; } @@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); if (!GEP) { - if (!canVectorizeInst(cast<Instruction>(AllocaUser))) + if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); @@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { - if (!canVectorizeInst(cast<Instruction>(GEPUser))) + if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); @@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) { for (User *User : Val->users()) { if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end()) continue; - if (isa<CallInst>(User)) { + if (CallInst *CI = dyn_cast<CallInst>(User)) { + // TODO: We might be able to handle some cases where the callee is a + // constantexpr bitcast of a function. + if (!CI->getCalledFunction()) + return false; + WorkList.push_back(User); continue; } @@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) { if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt) return false; + if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) { + // Reject if the stored value is not the pointer operand. + if (SI->getPointerOperand() != Val) + return false; + } + if (!User->getType()->isPointerTy()) continue; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td index 835a146..ba0490a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -14,8 +14,7 @@ let Namespace = "AMDGPU" in { foreach Index = 0-15 in { - // Indices are used in a variety of ways here, so don't set a size/offset. - def sub#Index : SubRegIndex<-1, -1>; + def sub#Index : SubRegIndex<32, !shl(Index, 5)>; } def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">; diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 468563c..4434d9b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -71,12 +71,26 @@ void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm, } } +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + case FK_Data_1: + return 1; + case FK_Data_2: + return 2; + case FK_Data_4: + return 4; + case FK_Data_8: + return 8; + default: + llvm_unreachable("Unknown fixup kind!"); + } +} + void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("Unknown fixup kind"); case AMDGPU::fixup_si_sopp_br: { uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset()); *Dst = (Value - 4) / 4; @@ -96,6 +110,24 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, *Dst = Value + 4; break; } + default: { + // FIXME: Copied from AArch64 + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); + if (!Value) + return; // Doesn't change encoding. + MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + } } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 099b0b1..c2db9ff 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -157,6 +157,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); setOperationAction(ISD::LOAD, MVT::i1, Custom); @@ -2252,10 +2253,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG, SDValue Ptr) const { const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); - uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size - return buildRSRC(DAG, DL, Ptr, 0, Rsrc); + return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23()); } SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1891061..cfd2c42 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2778,3 +2778,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { return RsrcDataFormat; } + +uint64_t SIInstrInfo::getScratchRsrcWords23() const { + uint64_t Rsrc23 = getDefaultRsrcDataFormat() | + AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size; + + // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. + // Clear them unless we want a huge stride. + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; + + return Rsrc23; +} diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 015ea12..5053786 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -353,7 +353,7 @@ public: } uint64_t getDefaultRsrcDataFormat() const; - + uint64_t getScratchRsrcWords23() const; }; namespace AMDGPU { diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index f78ffd7..e0eeea9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1548,6 +1548,12 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m < // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { +let isCommutable = 1 in { +defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32", + VOP_F32_F32_F32 +>; +} // End isCommutable = 1 + defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy >; @@ -1562,12 +1568,6 @@ defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>; } // End isCommutable = 1 } // End let SubtargetPredicate = SICI -let isCommutable = 1 in { -defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32", - VOP_F32_F32_F32 ->; -} // End isCommutable = 1 - defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32 >; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp index 0a7f684..2cd600d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp @@ -135,8 +135,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { unsigned ScratchRsrcReg = RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size + uint64_t Rsrc23 = TII->getScratchRsrcWords23(); unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); @@ -152,11 +151,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) - .addImm(Rsrc & 0xffffffff) + .addImm(Rsrc23 & 0xffffffff) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) - .addImm(Rsrc >> 32) + .addImm(Rsrc23 >> 32) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); // Scratch Offset diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 54c4d54..e9e8412 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -26,23 +26,25 @@ using namespace llvm; SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} -BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - Reserved.set(AMDGPU::EXEC); +void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { + MCRegAliasIterator R(Reg, this, true); - // EXEC_LO and EXEC_HI could be allocated and used as regular register, - // but this seems likely to result in bugs, so I'm marking them as reserved. - Reserved.set(AMDGPU::EXEC_LO); - Reserved.set(AMDGPU::EXEC_HI); + for (; R.isValid(); ++R) + Reserved.set(*R); +} +BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); - Reserved.set(AMDGPU::FLAT_SCR); - Reserved.set(AMDGPU::FLAT_SCR_LO); - Reserved.set(AMDGPU::FLAT_SCR_HI); + + // EXEC_LO and EXEC_HI could be allocated and used as regular register, but + // this seems likely to result in bugs, so I'm marking them as reserved. + reserveRegisterTuples(Reserved, AMDGPU::EXEC); + reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs - Reserved.set(AMDGPU::VGPR255); - Reserved.set(AMDGPU::VGPR254); + reserveRegisterTuples(Reserved, AMDGPU::VGPR254); + reserveRegisterTuples(Reserved, AMDGPU::VGPR255); // Tonga and Iceland can only allocate a fixed number of SGPRs due // to a hw bug. @@ -54,10 +56,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); - MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); - - for (; R.isValid(); ++R) - Reserved.set(*R); + reserveRegisterTuples(Reserved, Reg); } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index bfdb67c..7da6de2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -23,7 +23,10 @@ namespace llvm { struct SIRegisterInfo : public AMDGPURegisterInfo { +private: + void reserveRegisterTuples(BitVector &, unsigned Reg) const; +public: SIRegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override; |