diff options
author | dim <dim@FreeBSD.org> | 2014-11-24 17:02:24 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2014-11-24 17:02:24 +0000 |
commit | 2c8643c6396b0a3db33430cf9380e70bbb9efce0 (patch) | |
tree | 4df130b28021d86e13bf4565ef58c1c5a5e093b4 /contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp | |
parent | 678318cd20f7db4e6c6b85d83fe00fa327b04fca (diff) | |
parent | e27feadae0885aa074df58ebfda2e7a7f7a7d590 (diff) | |
download | FreeBSD-src-2c8643c6396b0a3db33430cf9380e70bbb9efce0.zip FreeBSD-src-2c8643c6396b0a3db33430cf9380e70bbb9efce0.tar.gz |
Merge llvm 3.5.0 release from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Diffstat (limited to 'contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp | 173 |
1 files changed, 90 insertions, 83 deletions
diff --git a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp index ef867d3..75b5a5e 100644 --- a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp +++ b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp @@ -55,6 +55,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" using namespace llvm; @@ -66,8 +67,8 @@ private: static const unsigned SkipThreshold = 12; static char ID; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; + const SIRegisterInfo *TRI; + const SIInstrInfo *TII; bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); @@ -85,17 +86,18 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); + void InitM0ForLDS(MachineBasicBlock::iterator MI); void LoadM0(MachineInstr &MI, MachineInstr *MovRel); void IndirectSrc(MachineInstr &MI); void IndirectDst(MachineInstr &MI); public: SILowerControlFlowPass(TargetMachine &tm) : - MachineFunctionPass(ID), TRI(0), TII(0) { } + MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { + const char *getPassName() const override { return "SI Lower control flow instructions"; } @@ -109,23 +111,6 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { return new SILowerControlFlowPass(tm); } -static bool isDS(unsigned Opcode) { - switch(Opcode) { - default: return false; - case AMDGPU::DS_ADD_U32_RTN: - case AMDGPU::DS_SUB_U32_RTN: - case AMDGPU::DS_WRITE_B32: - case AMDGPU::DS_WRITE_B8: - case AMDGPU::DS_WRITE_B16: - case AMDGPU::DS_READ_B32: - case AMDGPU::DS_READ_I8: - case AMDGPU::DS_READ_U8: - case AMDGPU::DS_READ_I16: - case AMDGPU::DS_READ_U16: - return true; - } -} - bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To) { @@ -162,7 +147,7 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); - if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType != + if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getShaderType() != ShaderType::PIXEL || !shouldSkip(&MBB, &MBB.getParent()->back())) return; @@ -302,33 +287,50 @@ void SILowerControlFlowPass::EndCf(MachineInstr &MI) { } void SILowerControlFlowPass::Branch(MachineInstr &MI) { - MachineBasicBlock *Next = MI.getParent()->getNextNode(); - MachineBasicBlock *Target = MI.getOperand(0).getMBB(); - if (Target == Next) + if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode()) MI.eraseFromParent(); - else - assert(0); + + // If these aren't equal, this is probably an infinite loop. } void SILowerControlFlowPass::Kill(MachineInstr &MI) { - MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); - - // Kill is only allowed in pixel / geometry shaders - assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == - ShaderType::PIXEL || - MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == - ShaderType::GEOMETRY); - - // Clear this pixel from the exec mask if the operand is negative - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) - .addImm(0) - .addOperand(MI.getOperand(0)); + const MachineOperand &Op = MI.getOperand(0); + +#ifndef NDEBUG + const SIMachineFunctionInfo *MFI + = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); + // Kill is only allowed in pixel / geometry shaders. + assert(MFI->getShaderType() == ShaderType::PIXEL || + MFI->getShaderType() == ShaderType::GEOMETRY); +#endif + + // Clear this thread from the exec mask if the operand is negative + if ((Op.isImm() || Op.isFPImm())) { + // Constant operand: Set exec mask to 0 or do nothing + if (Op.isImm() ? (Op.getImm() & 0x80000000) : + Op.getFPImm()->isNegative()) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addImm(0); + } + } else { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) + .addImm(0) + .addOperand(Op); + } MI.eraseFromParent(); } +/// The m0 register stores the maximum allowable address for LDS reads and +/// writes. Its value must be at least the size in bytes of LDS allocated by +/// the shader. For simplicity, we set it to the maximum possible value. +void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) { + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), + AMDGPU::M0).addImm(0xffffffff); +} + void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { MachineBasicBlock &MBB = *MI.getParent(); @@ -342,51 +344,57 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(Idx); MBB.insert(I, MovRel); - MI.eraseFromParent(); - return; - } + } else { - assert(AMDGPU::SReg_64RegClass.contains(Save)); - assert(AMDGPU::VReg_32RegClass.contains(Idx)); + assert(AMDGPU::SReg_64RegClass.contains(Save)); + assert(AMDGPU::VReg_32RegClass.contains(Idx)); - // Save the EXEC mask - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) - .addReg(AMDGPU::EXEC); + // Save the EXEC mask + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) + .addReg(AMDGPU::EXEC); - // Read the next variant into VCC (lower 32 bits) <- also loop target - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC) - .addReg(Idx); + // Read the next variant into VCC (lower 32 bits) <- also loop target + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + AMDGPU::VCC_LO) + .addReg(Idx); - // Move index from VCC into M0 - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) - .addReg(AMDGPU::VCC); + // Move index from VCC into M0 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(AMDGPU::VCC_LO); - // Compare the just read M0 value to all possible Idx values - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) - .addReg(AMDGPU::M0) - .addReg(Idx); + // Compare the just read M0 value to all possible Idx values + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) + .addReg(AMDGPU::M0) + .addReg(Idx); - // Update EXEC, save the original EXEC value to VCC - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) - .addReg(AMDGPU::VCC); + // Update EXEC, save the original EXEC value to VCC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) + .addReg(AMDGPU::VCC); - // Do the actual move - MBB.insert(I, MovRel); + // Do the actual move + MBB.insert(I, MovRel); - // Update EXEC, switch all done bits to 0 and all todo bits to 1 - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) - .addReg(AMDGPU::EXEC) - .addReg(AMDGPU::VCC); + // Update EXEC, switch all done bits to 0 and all todo bits to 1 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); - // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(-7) - .addReg(AMDGPU::EXEC); + // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(-7) + .addReg(AMDGPU::EXEC); - // Restore EXEC - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) - .addReg(Save); + // Restore EXEC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addReg(Save); + } + // FIXME: Are there any values other than the LDS address clamp that need to + // be stored in the m0 register and may be live for more than a few + // instructions? If so, we should save the m0 register at the beginning + // of this function and restore it here. + // FIXME: Add support for LDS direct loads. + InitM0ForLDS(&MI); MI.eraseFromParent(); } @@ -434,8 +442,8 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { } bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo()); + TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo()); SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); bool HaveKill = false; @@ -447,12 +455,12 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next) { + MachineBasicBlock::iterator I, Next; + for (I = MBB.begin(); I != MBB.end(); I = Next) { + Next = std::next(I); - Next = llvm::next(I); MachineInstr &MI = *I; - if (isDS(MI.getOpcode())) { + if (TII->isDS(MI.getOpcode())) { NeedM0 = true; NeedWQM = true; } @@ -531,11 +539,10 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock &MBB = MF.front(); // Initialize M0 to a value that won't cause LDS access to be discarded // due to offset clamping - BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32), - AMDGPU::M0).addImm(0xffffffff); + InitM0ForLDS(MBB.getFirstNonPHI()); } - if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) { + if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) { MachineBasicBlock &MBB = MF.front(); BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC).addReg(AMDGPU::EXEC); |