summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2014-11-24 17:02:24 +0000
committerdim <dim@FreeBSD.org>2014-11-24 17:02:24 +0000
commit2c8643c6396b0a3db33430cf9380e70bbb9efce0 (patch)
tree4df130b28021d86e13bf4565ef58c1c5a5e093b4 /contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
parent678318cd20f7db4e6c6b85d83fe00fa327b04fca (diff)
parente27feadae0885aa074df58ebfda2e7a7f7a7d590 (diff)
downloadFreeBSD-src-2c8643c6396b0a3db33430cf9380e70bbb9efce0.zip
FreeBSD-src-2c8643c6396b0a3db33430cf9380e70bbb9efce0.tar.gz
Merge llvm 3.5.0 release from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Diffstat (limited to 'contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp')
-rw-r--r--contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp173
1 files changed, 90 insertions, 83 deletions
diff --git a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
index ef867d3..75b5a5e 100644
--- a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
@@ -55,6 +55,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
using namespace llvm;
@@ -66,8 +67,8 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+ const SIInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -85,17 +86,18 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ void InitM0ForLDS(MachineBasicBlock::iterator MI);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
public:
SILowerControlFlowPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TRI(0), TII(0) { }
+ MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { }
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "SI Lower control flow instructions";
}
@@ -109,23 +111,6 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
return new SILowerControlFlowPass(tm);
}
-static bool isDS(unsigned Opcode) {
- switch(Opcode) {
- default: return false;
- case AMDGPU::DS_ADD_U32_RTN:
- case AMDGPU::DS_SUB_U32_RTN:
- case AMDGPU::DS_WRITE_B32:
- case AMDGPU::DS_WRITE_B8:
- case AMDGPU::DS_WRITE_B16:
- case AMDGPU::DS_READ_B32:
- case AMDGPU::DS_READ_I8:
- case AMDGPU::DS_READ_U8:
- case AMDGPU::DS_READ_I16:
- case AMDGPU::DS_READ_U16:
- return true;
- }
-}
-
bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
MachineBasicBlock *To) {
@@ -162,7 +147,7 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
+ if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getShaderType() !=
ShaderType::PIXEL ||
!shouldSkip(&MBB, &MBB.getParent()->back()))
return;
@@ -302,33 +287,50 @@ void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
}
void SILowerControlFlowPass::Branch(MachineInstr &MI) {
- MachineBasicBlock *Next = MI.getParent()->getNextNode();
- MachineBasicBlock *Target = MI.getOperand(0).getMBB();
- if (Target == Next)
+ if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode())
MI.eraseFromParent();
- else
- assert(0);
+
+ // If these aren't equal, this is probably an infinite loop.
}
void SILowerControlFlowPass::Kill(MachineInstr &MI) {
-
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
-
- // Kill is only allowed in pixel / geometry shaders
- assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
- ShaderType::PIXEL ||
- MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
- ShaderType::GEOMETRY);
-
- // Clear this pixel from the exec mask if the operand is negative
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
- .addImm(0)
- .addOperand(MI.getOperand(0));
+ const MachineOperand &Op = MI.getOperand(0);
+
+#ifndef NDEBUG
+ const SIMachineFunctionInfo *MFI
+ = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ // Kill is only allowed in pixel / geometry shaders.
+ assert(MFI->getShaderType() == ShaderType::PIXEL ||
+ MFI->getShaderType() == ShaderType::GEOMETRY);
+#endif
+
+ // Clear this thread from the exec mask if the operand is negative
+ if ((Op.isImm() || Op.isFPImm())) {
+ // Constant operand: Set exec mask to 0 or do nothing
+ if (Op.isImm() ? (Op.getImm() & 0x80000000) :
+ Op.getFPImm()->isNegative()) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addImm(0);
+ }
+ } else {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+ .addImm(0)
+ .addOperand(Op);
+ }
MI.eraseFromParent();
}
+/// The m0 register stores the maximum allowable address for LDS reads and
+/// writes. Its value must be at least the size in bytes of LDS allocated by
+/// the shader. For simplicity, we set it to the maximum possible value.
+void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
+ AMDGPU::M0).addImm(0xffffffff);
+}
+
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -342,51 +344,57 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(Idx);
MBB.insert(I, MovRel);
- MI.eraseFromParent();
- return;
- }
+ } else {
- assert(AMDGPU::SReg_64RegClass.contains(Save));
- assert(AMDGPU::VReg_32RegClass.contains(Idx));
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
- // Save the EXEC mask
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
- .addReg(AMDGPU::EXEC);
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
- // Read the next variant into VCC (lower 32 bits) <- also loop target
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
- .addReg(Idx);
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ AMDGPU::VCC_LO)
+ .addReg(Idx);
- // Move index from VCC into M0
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(AMDGPU::VCC);
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC_LO);
- // Compare the just read M0 value to all possible Idx values
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
- .addReg(AMDGPU::M0)
- .addReg(Idx);
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
- // Update EXEC, save the original EXEC value to VCC
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
- .addReg(AMDGPU::VCC);
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
- // Do the actual move
- MBB.insert(I, MovRel);
+ // Do the actual move
+ MBB.insert(I, MovRel);
- // Update EXEC, switch all done bits to 0 and all todo bits to 1
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addReg(AMDGPU::VCC);
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
- // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(-7)
- .addReg(AMDGPU::EXEC);
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
- // Restore EXEC
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
- .addReg(Save);
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+ }
+ // FIXME: Are there any values other than the LDS address clamp that need to
+ // be stored in the m0 register and may be live for more than a few
+ // instructions? If so, we should save the m0 register at the beginning
+ // of this function and restore it here.
+ // FIXME: Add support for LDS direct loads.
+ InitM0ForLDS(&MI);
MI.eraseFromParent();
}
@@ -434,8 +442,8 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
+ TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
@@ -447,12 +455,12 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next) {
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
- Next = llvm::next(I);
MachineInstr &MI = *I;
- if (isDS(MI.getOpcode())) {
+ if (TII->isDS(MI.getOpcode())) {
NeedM0 = true;
NeedWQM = true;
}
@@ -531,11 +539,10 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
// Initialize M0 to a value that won't cause LDS access to be discarded
// due to offset clamping
- BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
- AMDGPU::M0).addImm(0xffffffff);
+ InitM0ForLDS(MBB.getFirstNonPHI());
}
- if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
+ if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
OpenPOWER on IntegriCloud