summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp172
1 files changed, 168 insertions, 4 deletions
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 6d64345..b13f6b6 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -66,7 +66,6 @@
// C = copy A <-- same-bank copy
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -77,8 +76,10 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -119,6 +120,14 @@ static cl::opt<unsigned> RewritePHILimit(
"rewrite-phi-limit", cl::Hidden, cl::init(10),
cl::desc("Limit the length of PHI chains to lookup"));
+// Limit the length of recurrence chain when evaluating the benefit of
+// commuting operands.
+static cl::opt<unsigned> MaxRecurrenceChain(
+ "recurrence-chain-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum length of recurrence chain when evaluating the benefit "
+ "of commuting operands"));
+
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
class ValueTrackerResult;
+ class RecurrenceInstr;
class PeepholeOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
+ MachineLoopInfo *MLI;
public:
static char ID; // Pass identification
@@ -150,6 +161,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
if (Aggressive) {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -160,6 +173,9 @@ namespace {
typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
RewriteMapTy;
+ /// \brief Sequence of instructions that formulate recurrence cycle.
+ typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
+
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@@ -170,6 +186,7 @@ namespace {
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(unsigned Reg, unsigned SubReg,
RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI,
@@ -178,6 +195,13 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ /// \brief Finds recurrence cycles, but only ones that formulated around
+ /// a def operand and a use operand that are tied. If there is a use
+ /// operand commutable with the tied use operand, find recurrence cycle
+ /// along that operand as well.
+ bool findTargetRecurrence(unsigned Reg,
+ const SmallSet<unsigned, 2> &TargetReg,
+ RecurrenceCycle &RC);
/// \brief If copy instruction \p MI is a virtual register copy, track it in
/// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
@@ -222,6 +246,28 @@ namespace {
}
};
+ /// \brief Helper class to hold instructions that are inside recurrence
+ /// cycles. The recurrence cycle is formulated around 1) a def operand and its
+ /// tied use operand, or 2) a def operand and a use operand that is commutable
+ /// with another use operand which is tied to the def operand. In the latter
+ /// case, index of the tied use operand and the commutable use operand are
+ /// maintained with CommutePair.
+ class RecurrenceInstr {
+ public:
+ typedef std::pair<unsigned, unsigned> IndexPair;
+
+ RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
+ RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
+ : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
+
+ MachineInstr *getMI() const { return MI; }
+ Optional<IndexPair> getCommutePair() const { return CommutePair; }
+
+ private:
+ MachineInstr *MI;
+ Optional<IndexPair> CommutePair;
+ };
+
/// \brief Helper class to hold a reply for ValueTracker queries. Contains the
/// returned sources for a given search and the instructions where the sources
/// were tracked from.
@@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
@@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
+/// \bried Returns true if \p MO is a virtual register operand.
+static bool isVirtualRegisterOperand(MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+ return TargetRegisterInfo::isVirtualRegister(MO.getReg());
+}
+
+bool PeepholeOptimizer::findTargetRecurrence(
+ unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
+ RecurrenceCycle &RC) {
+ // Recurrence found if Reg is in TargetRegs.
+ if (TargetRegs.count(Reg))
+ return true;
+
+ // TODO: Curerntly, we only allow the last instruction of the recurrence
+ // cycle (the instruction that feeds the PHI instruction) to have more than
+ // one uses to guarantee that commuting operands does not tie registers
+ // with overlapping live range. Once we have actual live range info of
+ // each register, this constraint can be relaxed.
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ // Give up if the reccurrence chain length is longer than the limit.
+ if (RC.size() >= MaxRecurrenceChain)
+ return false;
+
+ MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
+ unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
+
+ // Only interested in recurrences whose instructions have only one def, which
+ // is a virtual register.
+ if (MI.getDesc().getNumDefs() != 1)
+ return false;
+
+ MachineOperand &DefOp = MI.getOperand(0);
+ if (!isVirtualRegisterOperand(DefOp))
+ return false;
+
+ // Check if def operand of MI is tied to any use operand. We are only
+ // interested in the case that all the instructions in the recurrence chain
+ // have there def operand tied with one of the use operand.
+ unsigned TiedUseIdx;
+ if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
+ return false;
+
+ if (Idx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ } else {
+ // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
+ unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ }
+ }
+
+ return false;
+}
+
+/// \brief Phi instructions will eventually be lowered to copy instructions. If
+/// phi is in a loop header, a recurrence may formulated around the source and
+/// destination of the phi. For such case commuting operands of the instructions
+/// in the recurrence may enable coalescing of the copy instruction generated
+/// from the phi. For example, if there is a recurrence of
+///
+/// LoopHeader:
+/// %vreg1 = phi(%vreg0, %vreg100)
+/// LoopLatch:
+/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
+///
+/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
+/// the coalescing of copy instruction generated from the phi in
+/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
+/// %vreg2 have overlapping live range. This introduces additional move
+/// instruction to the final assembly. However, if we commute %vreg2 and
+/// %vreg1 of ADD instruction, the redundant move instruction can be
+/// avoided.
+bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
+ SmallSet<unsigned, 2> TargetRegs;
+ for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
+ MachineOperand &MO = PHI.getOperand(Idx);
+ assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
+ TargetRegs.insert(MO.getReg());
+ }
+
+ bool Changed = false;
+ RecurrenceCycle RC;
+ if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
+ // Commutes operands of instructions in RC if necessary so that the copy to
+ // be generated from PHI can be coalesced.
+ DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
+ for (auto &RI : RC) {
+ DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
+ auto CP = RI.getCommutePair();
+ if (CP) {
+ Changed = true;
+ TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
+ (*CP).second);
+ DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
+ }
+ }
+ }
+
+ return Changed;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
+ MLI = &getAnalysis<MachineLoopInfo>();
bool Changed = false;
@@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 4> CopySrcRegs;
DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+ bool IsLoopHeader = MLI->isLoopHeader(&MBB);
+
for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
MII != MIE; ) {
MachineInstr *MI = &*MII;
@@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
- if (MI->isPosition() || MI->isPHI())
+ if (MI->isPosition())
continue;
+ if (IsLoopHeader && MI->isPHI()) {
+ if (optimizeRecurrence(*MI)) {
+ Changed = true;
+ continue;
+ }
+ }
+
if (!MI->isCopy()) {
for (const auto &Op : MI->operands()) {
// Visit all operands: definitions can be implicit or explicit.
@@ -1667,7 +1831,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MRI->markUsesInDebugValueAsUndef(FoldedReg);
FoldAsLoadDefCandidates.erase(FoldedReg);
++NumLoadFold;
-
+
// MI is replaced with FoldMI so we can continue trying to fold
Changed = true;
MI = FoldMI;
@@ -1675,7 +1839,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
}
}
}
-
+
// If we run into an instruction we can't fold across, discard
// the load candidates. Note: We might be able to fold *into* this
// instruction, so this needs to be after the folding logic.
OpenPOWER on IntegriCloud