summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp439
1 files changed, 259 insertions, 180 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 91af4a1..c7f9122 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -12,6 +12,7 @@
// counts of loops easily.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -19,11 +20,10 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -32,6 +32,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <climits>
@@ -45,16 +46,14 @@ static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The baseline cost threshold for loop unrolling"));
-static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
- "unroll-percent-dynamic-cost-saved-threshold", cl::init(50), cl::Hidden,
- cl::desc("The percentage of estimated dynamic cost which must be saved by "
- "unrolling to allow unrolling up to the max threshold."));
-
-static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
- "unroll-dynamic-cost-savings-discount", cl::init(100), cl::Hidden,
- cl::desc("This is the amount discounted from the total unroll cost when "
- "the unrolled form has a high dynamic cost savings (triggered by "
- "the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
+static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
+ "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
+ cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
+ "to the threshold when aggressively unrolling a loop due to the "
+ "dynamic cost savings. If completely unrolling a loop will reduce "
+ "the total runtime from X to Y, we boost the loop unroll "
+ "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
+ "X/Y). This limit avoids excessive code bloat."));
static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
"unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
@@ -90,43 +89,59 @@ static cl::opt<bool>
UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
+static cl::opt<unsigned> UnrollMaxUpperBound(
+ "unroll-max-upperbound", cl::init(8), cl::Hidden,
+ cl::desc(
+ "The max of trip count upper bound that is considered in unrolling"));
+
static cl::opt<unsigned> PragmaUnrollThreshold(
"pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
cl::desc("Unrolled size limit for loops with an unroll(full) or "
"unroll_count pragma."));
+static cl::opt<unsigned> FlatLoopTripCountThreshold(
+ "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
+ cl::desc("If the runtime tripcount for the loop is lower than the "
+ "threshold, the loop is considered as flat and will be less "
+ "aggressively unrolled."));
+
+static cl::opt<bool>
+ UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden,
+ cl::desc("Allows loops to be peeled when the dynamic "
+ "trip count is known to be low."));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
-/// Default unroll count for loops with run-time trip count if
-/// -unroll-count is not set
-static const unsigned DefaultUnrollRuntimeCount = 8;
-
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
- Optional<bool> UserRuntime) {
+ Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
UP.Threshold = 150;
- UP.PercentDynamicCostSavedThreshold = 50;
- UP.DynamicCostSavingsDiscount = 100;
+ UP.MaxPercentThresholdBoost = 400;
UP.OptSizeThreshold = 0;
UP.PartialThreshold = UP.Threshold;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
+ UP.PeelCount = 0;
+ UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = UINT_MAX;
UP.FullUnrollMaxCount = UINT_MAX;
+ UP.BEInsns = 2;
UP.Partial = false;
UP.Runtime = false;
UP.AllowRemainder = true;
UP.AllowExpensiveTripCount = false;
UP.Force = false;
+ UP.UpperBound = false;
+ UP.AllowPeeling = false;
// Override with any target specific settings
TTI.getUnrollingPreferences(L, UP);
@@ -142,11 +157,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.Threshold = UnrollThreshold;
UP.PartialThreshold = UnrollThreshold;
}
- if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
- UP.PercentDynamicCostSavedThreshold =
- UnrollPercentDynamicCostSavedThreshold;
- if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
- UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
+ if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
+ UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
if (UnrollMaxCount.getNumOccurrences() > 0)
UP.MaxCount = UnrollMaxCount;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
@@ -157,6 +169,10 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.AllowRemainder = UnrollAllowRemainder;
if (UnrollRuntime.getNumOccurrences() > 0)
UP.Runtime = UnrollRuntime;
+ if (UnrollMaxUpperBound == 0)
+ UP.UpperBound = false;
+ if (UnrollAllowPeeling.getNumOccurrences() > 0)
+ UP.AllowPeeling = UnrollAllowPeeling;
// Apply user values provided by argument
if (UserThreshold.hasValue()) {
@@ -169,6 +185,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.Partial = *UserAllowPartial;
if (UserRuntime.hasValue())
UP.Runtime = *UserRuntime;
+ if (UserUpperBound.hasValue())
+ UP.UpperBound = *UserUpperBound;
return UP;
}
@@ -210,11 +228,11 @@ struct UnrolledInstStateKeyInfo {
namespace {
struct EstimatedUnrollCost {
/// \brief The estimated cost after unrolling.
- int UnrolledCost;
+ unsigned UnrolledCost;
/// \brief The estimated dynamic cost of executing the instructions in the
/// rolled form.
- int RolledDynamicCost;
+ unsigned RolledDynamicCost;
};
}
@@ -234,7 +252,7 @@ struct EstimatedUnrollCost {
static Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
ScalarEvolution &SE, const TargetTransformInfo &TTI,
- int MaxUnrolledLoopSize) {
+ unsigned MaxUnrolledLoopSize) {
// We want to be able to scale offsets by the trip count and add more offsets
// to them without checking for overflows, and we already don't want to
// analyze *massive* trip counts, so we force the max to be reasonably small.
@@ -258,14 +276,14 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// The estimated cost of the unrolled form of the loop. We try to estimate
// this by simplifying as much as we can while computing the estimate.
- int UnrolledCost = 0;
+ unsigned UnrolledCost = 0;
// We also track the estimated dynamic (that is, actually executed) cost in
// the rolled form. This helps identify cases when the savings from unrolling
// aren't just exposing dead control flows, but actual reduced dynamic
// instructions due to the simplifications which we expect to occur after
// unrolling.
- int RolledDynamicCost = 0;
+ unsigned RolledDynamicCost = 0;
// We track the simplification of each instruction in each iteration. We use
// this to recursively merge costs into the unrolled cost on-demand so that
@@ -412,6 +430,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
// it. We don't change the actual IR, just count optimization
// opportunities.
for (Instruction &I : *BB) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
// Track this instruction's expected baseline cost when executing the
// rolled loop form.
RolledDynamicCost += TTI.getUserCost(&I);
@@ -429,16 +450,16 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
if (IsFree)
continue;
- // If the instruction might have a side-effect recursively account for
- // the cost of it and all the instructions leading up to it.
- if (I.mayHaveSideEffects())
- AddCostRecursively(I, Iteration);
-
// Can't properly model a cost of a call.
// FIXME: With a proper cost model we should be able to do it.
if(isa<CallInst>(&I))
return None;
+ // If the instruction might have a side-effect recursively account for
+ // the cost of it and all the instructions leading up to it.
+ if (I.mayHaveSideEffects())
+ AddCostRecursively(I, Iteration);
+
// If unrolled body turns out to be too big, bail out.
if (UnrolledCost > MaxUnrolledLoopSize) {
DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
@@ -529,7 +550,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable, bool &Convergent,
const TargetTransformInfo &TTI,
- AssumptionCache *AC) {
+ AssumptionCache *AC, unsigned BEInsns) {
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
@@ -548,7 +569,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
// that each loop has at least three instructions (likely a conditional
// branch, a comparison feeding that branch, and some kind of loop increment
// feeding that comparison instruction).
- LoopSize = std::max(LoopSize, 3u);
+ LoopSize = std::max(LoopSize, BEInsns + 1);
return LoopSize;
}
@@ -635,70 +656,38 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
L->setLoopID(NewLoopID);
}
-static bool canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost,
- uint64_t RolledDynamicCost) {
- if (Threshold == NoThreshold) {
- DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");
- return true;
- }
-
- if (UnrolledCost <= Threshold) {
- DEBUG(dbgs() << " Can fully unroll, because unrolled cost: "
- << UnrolledCost << "<" << Threshold << "\n");
- return true;
- }
-
- assert(UnrolledCost && "UnrolledCost can't be 0 at this point.");
- assert(RolledDynamicCost >= UnrolledCost &&
- "Cannot have a higher unrolled cost than a rolled cost!");
-
- // Compute the percentage of the dynamic cost in the rolled form that is
- // saved when unrolled. If unrolling dramatically reduces the estimated
- // dynamic cost of the loop, we use a higher threshold to allow more
- // unrolling.
- unsigned PercentDynamicCostSaved =
- (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost;
-
- if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold &&
- (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
- (int64_t)Threshold) {
- DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the "
- "expected dynamic cost by "
- << PercentDynamicCostSaved << "% (threshold: "
- << PercentDynamicCostSavedThreshold << "%)\n"
- << " and the unrolled cost (" << UnrolledCost
- << ") is less than the max threshold ("
- << DynamicCostSavingsDiscount << ").\n");
- return true;
- }
+// Computes the boosting factor for complete unrolling.
+// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
+// be beneficial to fully unroll the loop even if unrolledcost is large. We
+// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
+// the unroll threshold.
+static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
+ unsigned MaxPercentThresholdBoost) {
+ if (Cost.RolledDynamicCost >= UINT_MAX / 100)
+ return 100;
+ else if (Cost.UnrolledCost != 0)
+ // The boosting factor is RolledDynamicCost / UnrolledCost
+ return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
+ MaxPercentThresholdBoost);
+ else
+ return MaxPercentThresholdBoost;
+}
- DEBUG(dbgs() << " Too large to fully unroll:\n");
- DEBUG(dbgs() << " Threshold: " << Threshold << "\n");
- DEBUG(dbgs() << " Max threshold: " << DynamicCostSavingsDiscount << "\n");
- DEBUG(dbgs() << " Percent cost saved threshold: "
- << PercentDynamicCostSavedThreshold << "%\n");
- DEBUG(dbgs() << " Unrolled cost: " << UnrolledCost << "\n");
- DEBUG(dbgs() << " Rolled dynamic cost: " << RolledDynamicCost << "\n");
- DEBUG(dbgs() << " Percent cost saved: " << PercentDynamicCostSaved
- << "\n");
- return false;
+// Returns loop size estimation for unrolled loop.
+static uint64_t getUnrolledLoopSize(
+ unsigned LoopSize,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
+ return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
-static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
- DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE, unsigned TripCount,
- unsigned TripMultiple, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP) {
- // BEInsns represents number of instructions optimized when "back edge"
- // becomes "fall through" in unrolled loop.
- // For now we count a conditional branch on a backedge and a comparison
- // feeding it.
- unsigned BEInsns = 2;
+static bool computeUnrollCount(
+ Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
+ unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
+ TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
@@ -706,8 +695,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.Count = UnrollCount;
UP.AllowExpensiveTripCount = true;
UP.Force = true;
- if (UP.AllowRemainder &&
- (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold)
+ if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
return true;
}
@@ -719,13 +707,13 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.AllowExpensiveTripCount = true;
UP.Force = true;
if (UP.AllowRemainder &&
- (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+ getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return true;
}
bool PragmaFullUnroll = HasUnrollFullPragma(L);
if (PragmaFullUnroll && TripCount != 0) {
UP.Count = TripCount;
- if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+ if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return false;
}
@@ -733,11 +721,6 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
- uint64_t UnrolledSize;
- DebugLoc LoopLoc = L->getStartLoc();
- Function *F = L->getHeader()->getParent();
- LLVMContext &Ctx = F->getContext();
-
if (ExplicitUnroll && TripCount != 0) {
// If the loop has an unrolling pragma, we want to be more aggressive with
// unrolling limits. Set thresholds to at least the PragmaThreshold value
@@ -748,38 +731,48 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
}
// 3rd priority is full unroll count.
- // Full unroll make sense only when TripCount could be staticaly calculated.
+ // Full unroll makes sense only when TripCount or its upper bound could be
+ // statically calculated.
// Also we need to check if we exceed FullUnrollMaxCount.
- if (TripCount && TripCount <= UP.FullUnrollMaxCount) {
+ // If using the upper bound to unroll, TripMultiple should be set to 1 because
+ // we do not know when loop may exit.
+ // MaxTripCount and ExactTripCount cannot both be non zero since we only
+ // compute the former when the latter is zero.
+ unsigned ExactTripCount = TripCount;
+ assert((ExactTripCount == 0 || MaxTripCount == 0) &&
+ "ExtractTripCound and MaxTripCount cannot both be non zero.");
+ unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
+ UP.Count = FullUnrollTripCount;
+ if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
- UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns;
- if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
- UnrolledSize, UnrolledSize)) {
- UP.Count = TripCount;
+ if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
+ UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+ TripCount = FullUnrollTripCount;
+ TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, TripCount, DT, *SE, TTI,
- UP.Threshold + UP.DynamicCostSavingsDiscount))
- if (canUnrollCompletely(L, UP.Threshold,
- UP.PercentDynamicCostSavedThreshold,
- UP.DynamicCostSavingsDiscount,
- Cost->UnrolledCost, Cost->RolledDynamicCost)) {
- UP.Count = TripCount;
+ L, FullUnrollTripCount, DT, *SE, TTI,
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
+ unsigned Boost =
+ getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+ UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+ TripCount = FullUnrollTripCount;
+ TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
}
+ }
}
}
// 4rd priority is partial unrolling.
// Try partial unroll only when TripCount could be staticaly calculated.
if (TripCount) {
- if (UP.Count == 0)
- UP.Count = TripCount;
UP.Partial |= ExplicitUnroll;
if (!UP.Partial) {
DEBUG(dbgs() << " will not try to unroll partially because "
@@ -787,12 +780,14 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.Count = 0;
return false;
}
+ if (UP.Count == 0)
+ UP.Count = TripCount;
if (UP.PartialThreshold != NoThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
- UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns;
- if (UnrolledSize > UP.PartialThreshold)
- UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) /
- (LoopSize - BEInsns);
+ if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
+ UP.Count =
+ (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
if (UP.Count > UP.MaxCount)
UP.Count = UP.MaxCount;
while (UP.Count != 0 && TripCount % UP.Count != 0)
@@ -802,19 +797,18 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
// largest power-of-two factor that satisfies the threshold limit.
// As we'll create fixup loop, do the type of unrolling only if
// remainder loop is allowed.
- UP.Count = DefaultUnrollRuntimeCount;
- UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
- while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+ UP.Count = UP.DefaultUnrollRuntimeCount;
+ while (UP.Count != 0 &&
+ getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
UP.Count >>= 1;
- UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
- }
}
if (UP.Count < 2) {
if (PragmaEnableUnroll)
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to unroll loop as directed by unroll(enable) pragma "
- "because unrolled size is too large.");
+ ORE->emit(
+ OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to unroll loop as directed by unroll(enable) pragma "
+ "because unrolled size is too large.");
UP.Count = 0;
}
} else {
@@ -822,26 +816,48 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
}
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
UP.Count != TripCount)
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll pragma because "
- "unrolled size is too large.");
+ ORE->emit(
+ OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll pragma because "
+ "unrolled size is too large.");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
"All cases when TripCount is constant should be covered here.");
if (PragmaFullUnroll)
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because loop has a runtime trip count.");
+ ORE->emit(
+ OptimizationRemarkMissed(DEBUG_TYPE,
+ "CantFullUnrollAsDirectedRuntimeTripCount",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll(full) pragma "
+ "because loop has a runtime trip count.");
+
+ // 5th priority is loop peeling
+ computePeelCount(L, LoopSize, UP);
+ if (UP.PeelCount) {
+ UP.Runtime = false;
+ UP.Count = 1;
+ return ExplicitUnroll;
+ }
- // 5th priority is runtime unrolling.
+ // 6th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
if (HasRuntimeUnrollDisablePragma(L)) {
UP.Count = 0;
return false;
}
+
+ // Check if the runtime trip count is too small when profile is available.
+ if (L->getHeader()->getParent()->getEntryCount()) {
+ if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
+ if (*ProfileTripCount < FlatLoopTripCountThreshold)
+ return false;
+ else
+ UP.AllowExpensiveTripCount = true;
+ }
+ }
+
// Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
@@ -851,15 +867,13 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
return false;
}
if (UP.Count == 0)
- UP.Count = DefaultUnrollRuntimeCount;
- UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+ UP.Count = UP.DefaultUnrollRuntimeCount;
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
- while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+ while (UP.Count != 0 &&
+ getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
UP.Count >>= 1;
- UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
- }
#ifndef NDEBUG
unsigned OrigCount = UP.Count;
@@ -874,16 +888,19 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
"multiple, "
<< TripMultiple << ". Reducing unroll count from "
<< OrigCount << " to " << UP.Count << ".\n");
+ using namespace ore;
if (PragmaCount > 0 && !UP.AllowRemainder)
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- Twine("Unable to unroll loop the number of times directed by "
- "unroll_count pragma because remainder loop is restricted "
- "(that could architecture specific or because the loop "
- "contains a convergent instruction) and so must have an unroll "
- "count that divides the loop trip multiple of ") +
- Twine(TripMultiple) + ". Unrolling instead " + Twine(UP.Count) +
- " time(s).");
+ ORE->emit(
+ OptimizationRemarkMissed(DEBUG_TYPE,
+ "DifferentUnrollCountFromDirected",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to unroll loop the number of times directed by "
+ "unroll_count pragma because remainder loop is restricted "
+ "(that could architecture specific or because the loop "
+ "contains a convergent instruction) and so must have an unroll "
+ "count that divides the loop trip multiple of "
+ << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
+ << NV("UnrollCount", UP.Count) << " time(s).");
}
if (UP.Count > UP.MaxCount)
@@ -896,22 +913,34 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE, const TargetTransformInfo &TTI,
- AssumptionCache &AC, bool PreserveLCSSA,
+ AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
+ bool PreserveLCSSA,
Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold,
Optional<bool> ProvidedAllowPartial,
- Optional<bool> ProvidedRuntime) {
+ Optional<bool> ProvidedRuntime,
+ Optional<bool> ProvidedUpperBound) {
DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
<< "] Loop %" << L->getHeader()->getName() << "\n");
- if (HasUnrollDisablePragma(L)) {
+ if (HasUnrollDisablePragma(L))
+ return false;
+ if (!L->isLoopSimplifyForm()) {
+ DEBUG(
+ dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
return false;
}
unsigned NumInlineCandidates;
bool NotDuplicatable;
bool Convergent;
+ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+ L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ ProvidedRuntime, ProvidedUpperBound);
+ // Exit early if unrolling is disabled.
+ if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
+ return false;
unsigned LoopSize = ApproximateLoopSize(
- L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
+ L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (NotDuplicatable) {
DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
@@ -922,14 +951,10 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return false;
}
- if (!L->isLoopSimplifyForm()) {
- DEBUG(
- dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
- return false;
- }
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
+ unsigned MaxTripCount = 0;
unsigned TripMultiple = 1;
// If there are multiple exiting blocks but one of them is the latch, use the
// latch for the trip count estimation. Otherwise insist on a single exiting
@@ -942,10 +967,6 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
- TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
- ProvidedRuntime);
-
// If the loop contains a convergent operation, the prelude we'd add
// to do the first few instructions before we hit the unrolled loop
// is unsafe -- it adds a control-flow dependency to the convergent
@@ -961,8 +982,31 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
if (Convergent)
UP.AllowRemainder = false;
- bool IsCountSetExplicitly = computeUnrollCount(L, TTI, DT, LI, SE, TripCount,
- TripMultiple, LoopSize, UP);
+ // Try to find the trip count upper bound if we cannot find the exact trip
+ // count.
+ bool MaxOrZero = false;
+ if (!TripCount) {
+ MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+ MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+ // We can unroll by the upper bound amount if it's generally allowed or if
+ // we know that the loop is executed either the upper bound or zero times.
+ // (MaxOrZero unrolling keeps only the first loop test, so the number of
+ // loop tests remains the same compared to the non-unrolled version, whereas
+ // the generic upper bound unrolling keeps all but the last loop test so the
+ // number of loop tests goes up which may end up being worse on targets with
+ // constriained branch predictor resources so is controlled by an option.)
+ // In addition we only unroll small upper bounds.
+ if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
+ MaxTripCount = 0;
+ }
+ }
+
+ // computeUnrollCount() decides whether it is beneficial to use upper bound to
+ // fully unroll the loop.
+ bool UseUpperBound = false;
+ bool IsCountSetExplicitly =
+ computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,
+ TripMultiple, LoopSize, UP, UseUpperBound);
if (!UP.Count)
return false;
// Unroll factor (Count) must be less or equal to TripCount.
@@ -971,14 +1015,18 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// Unroll the loop.
if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
- UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC,
+ UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
+ TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE,
PreserveLCSSA))
return false;
// If loop has an unroll count pragma or unrolled by explicitly set count
// mark loop as unrolled to prevent unrolling beyond that requested.
- if (IsCountSetExplicitly)
+ // If the loop was peeled, we already "used up" the profile information
+ // we had, so we don't want to unroll or peel again.
+ if (IsCountSetExplicitly || UP.PeelCount)
SetLoopAlreadyUnrolled(L);
+
return true;
}
@@ -988,10 +1036,11 @@ public:
static char ID; // Pass ID, replacement for typeid
LoopUnroll(Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
- Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
+ Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
+ Optional<bool> UpperBound = None)
: LoopPass(ID), ProvidedCount(std::move(Count)),
ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
- ProvidedRuntime(Runtime) {
+ ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -999,6 +1048,7 @@ public:
Optional<unsigned> ProvidedThreshold;
Optional<bool> ProvidedAllowPartial;
Optional<bool> ProvidedRuntime;
+ Optional<bool> ProvidedUpperBound;
bool runOnLoop(Loop *L, LPPassManager &) override {
if (skipLoop(L))
@@ -1012,11 +1062,16 @@ public:
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
+ // pass. Function analyses need to be preserved across loop transformations
+ // but ORE cannot be preserved (see comment before the pass definition).
+ OptimizationRemarkEmitter ORE(&F);
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
- return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
- ProvidedThreshold, ProvidedAllowPartial,
- ProvidedRuntime);
+ return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
+ ProvidedCount, ProvidedThreshold,
+ ProvidedAllowPartial, ProvidedRuntime,
+ ProvidedUpperBound);
}
/// This transformation requires natural loop information & requires that
@@ -1040,7 +1095,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
- int Runtime) {
+ int Runtime, int UpperBound) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
@@ -1048,9 +1103,33 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
Count == -1 ? None : Optional<unsigned>(Count),
AllowPartial == -1 ? None
: Optional<bool>(AllowPartial),
- Runtime == -1 ? None : Optional<bool>(Runtime));
+ Runtime == -1 ? None : Optional<bool>(Runtime),
+ UpperBound == -1 ? None : Optional<bool>(UpperBound));
}
Pass *llvm::createSimpleLoopUnrollPass() {
- return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+ return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
+}
+
+PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &) {
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
+ // FIXME: This should probably be optional rather than required.
+ if (!ORE)
+ report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
+ "cached at a higher level");
+
+ bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
+ /*PreserveLCSSA*/ true, ProvidedCount,
+ ProvidedThreshold, ProvidedAllowPartial,
+ ProvidedRuntime, ProvidedUpperBound);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return getLoopPassPreservedAnalyses();
}
OpenPOWER on IntegriCloud