diff options
Diffstat (limited to 'contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp | 136 |
1 files changed, 103 insertions, 33 deletions
diff --git a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp index 745c4b6..f91d117 100644 --- a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp +++ b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp @@ -10,11 +10,13 @@ // #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" @@ -24,6 +26,8 @@ STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit."); +STATISTIC(NumLiteralConstantsFolded, + "Number of literal constants folded into 32-bit instructions."); namespace llvm { void initializeSIShrinkInstructionsPass(PassRegistry&); @@ -41,13 +45,13 @@ public: SIShrinkInstructions() : MachineFunctionPass(ID) { } - virtual bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const override { + const char *getPassName() const override { return "SI Shrink Instructions"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -90,29 +94,83 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, const MachineOperand *Src1Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); - if (Src1 && (!isVGPR(Src1, TRI, MRI) || Src1Mod->getImm() != 0)) + if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0))) return false; - // We don't need to check src0, all input types are legal, so just make - // sure src0 isn't using any modifiers. - const MachineOperand *Src0Mod = - TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); - if (Src0Mod && Src0Mod->getImm() != 0) + // We don't need to check src0, all input types are legal, so just make sure + // src0 isn't using any modifiers. + if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers)) return false; // Check output modifiers - const MachineOperand *Omod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); - if (Omod && Omod->getImm() != 0) + if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) return false; - const MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); - return !Clamp || Clamp->getImm() == 0; + if (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp)) + return false; + + return true; +} + +/// \brief This function checks \p MI for operands defined by a move immediate +/// instruction and then folds the literal constant into the instruction if it +/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction +/// and will only fold literal constants if we are still in SSA. +static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, + MachineRegisterInfo &MRI, bool TryToCommute = true) { + + if (!MRI.isSSA()) + return; + + assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) || + TII->isVOPC(MI.getOpcode())); + + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + + // Only one literal constant is allowed per instruction, so if src0 is a + // literal constant then we can't do any folding. + if (Src0->isImm() && TII->isLiteralConstant(*Src0)) + return; + + + // Literal constants and SGPRs can only be used in Src0, so if Src0 is an + // SGPR, we cannot commute the instruction, so we can't fold any literal + // constants. + if (Src0->isReg() && !isVGPR(Src0, TRI, MRI)) + return; + + // Try to fold Src0 + if (Src0->isReg()) { + unsigned Reg = Src0->getReg(); + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (Def && Def->isMoveImmediate()) { + MachineOperand &MovSrc = Def->getOperand(1); + bool ConstantFolded = false; + + if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) { + Src0->ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } + if (ConstantFolded) { + if (MRI.use_empty(Reg)) + Def->eraseFromParent(); + ++NumLiteralConstantsFolded; + return; + } + } + } + + // We have failed to fold src0, so commute the instruction and try again. + if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI)) + foldImmediates(MI, TII, MRI, false); + } bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( - MF.getTarget().getInstrInfo()); + const SIInstrInfo *TII = + static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); std::vector<unsigned> I1Defs; @@ -125,11 +183,23 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; + // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. + if (MI.getOpcode() == AMDGPU::S_MOV_B32) { + const MachineOperand &Src = MI.getOperand(1); + + if (Src.isImm()) { + if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src)) + MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); + } + + continue; + } + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; if (!canShrink(MI, TII, TRI, MRI)) { - // Try commtuing the instruction and see if that enables us to shrink + // Try commuting the instruction and see if that enables us to shrink // it. if (!MI.isCommutable() || !TII->commuteInstruction(&MI) || !canShrink(MI, TII, TRI, MRI)) @@ -147,18 +217,17 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { unsigned DstReg = MI.getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { // VOPC instructions can only write to the VCC register. We can't - // force them to use VCC here, because the register allocator - // has trouble with sequences like this, which cause the allocator - // to run out of registes if vreg0 and vreg1 belong to the VCCReg - // register class: + // force them to use VCC here, because the register allocator has + // trouble with sequences like this, which cause the allocator to run + // out of registers if vreg0 and vreg1 belong to the VCCReg register + // class: // vreg0 = VOPC; // vreg1 = VOPC; // S_AND_B64 vreg0, vreg1 // - // So, instead of forcing the instruction to write to VCC, we provide a - // hint to the register allocator to use VCC and then we - // we will run this pass again after RA and shrink it if it outpus to - // VCC. + // So, instead of forcing the instruction to write to VCC, we provide + // a hint to the register allocator to use VCC and then we we will run + // this pass again after RA and shrink it if it outputs to VCC. MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC); continue; } @@ -167,27 +236,28 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } // We can shrink this instruction - DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << "\n";); + DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';); - MachineInstrBuilder MIB = + MachineInstrBuilder Inst32 = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); // dst - MIB.addOperand(MI.getOperand(0)); + Inst32.addOperand(MI.getOperand(0)); - MIB.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); + Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src1) - MIB.addOperand(*Src1); - - for (const MachineOperand &MO : MI.implicit_operands()) - MIB.addOperand(MO); + Inst32.addOperand(*Src1); - DEBUG(dbgs() << "e32 MI = "; MI.dump(); dbgs() << "\n";); ++NumInstructionsShrunk; MI.eraseFromParent(); + + foldImmediates(*Inst32, TII, MRI); + DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); + + } } return false; |