summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Analysis/InlineCost.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Analysis/InlineCost.cpp')
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp279
1 files changed, 166 insertions, 113 deletions
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index dcb724a..4109049 100644
--- a/contrib/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -40,18 +40,7 @@ using namespace llvm;
STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
-// Threshold to use when optsize is specified (and there is no
-// -inline-threshold).
-const int OptSizeThreshold = 75;
-
-// Threshold to use when -Oz is specified (and there is no -inline-threshold).
-const int OptMinSizeThreshold = 25;
-
-// Threshold to use when -O[34] is specified (and there is no
-// -inline-threshold).
-const int OptAggressiveThreshold = 275;
-
-static cl::opt<int> DefaultInlineThreshold(
+static cl::opt<int> InlineThreshold(
"inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
@@ -66,6 +55,11 @@ static cl::opt<int> ColdThreshold(
"inlinecold-threshold", cl::Hidden, cl::init(225),
cl::desc("Threshold for inlining functions with cold attribute"));
+static cl::opt<int>
+ HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
+ cl::ZeroOrMore,
+ cl::desc("Threshold for hot callsites "));
+
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -75,20 +69,23 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
- /// The cache of @llvm.assume intrinsics.
- AssumptionCacheTracker *ACT;
+ /// Getter for the cache of @llvm.assume intrinsics.
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
/// Profile summary information.
ProfileSummaryInfo *PSI;
- // The called function.
+ /// The called function.
Function &F;
- // The candidate callsite being analyzed. Please do not use this to do
- // analysis in the caller function; we want the inline cost query to be
- // easily cacheable. Instead, use the cover function paramHasAttr.
+ /// The candidate callsite being analyzed. Please do not use this to do
+ /// analysis in the caller function; we want the inline cost query to be
+ /// easily cacheable. Instead, use the cover function paramHasAttr.
CallSite CandidateCS;
+ /// Tunable parameters that control the analysis.
+ const InlineParams &Params;
+
int Threshold;
int Cost;
@@ -107,25 +104,25 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
int FiftyPercentVectorBonus, TenPercentVectorBonus;
int VectorBonus;
- // While we walk the potentially-inlined instructions, we build up and
- // maintain a mapping of simplified values specific to this callsite. The
- // idea is to propagate any special information we have about arguments to
- // this call through the inlinable section of the function, and account for
- // likely simplifications post-inlining. The most important aspect we track
- // is CFG altering simplifications -- when we prove a basic block dead, that
- // can cause dramatic shifts in the cost of inlining a function.
+ /// While we walk the potentially-inlined instructions, we build up and
+ /// maintain a mapping of simplified values specific to this callsite. The
+ /// idea is to propagate any special information we have about arguments to
+ /// this call through the inlinable section of the function, and account for
+ /// likely simplifications post-inlining. The most important aspect we track
+ /// is CFG altering simplifications -- when we prove a basic block dead, that
+ /// can cause dramatic shifts in the cost of inlining a function.
DenseMap<Value *, Constant *> SimplifiedValues;
- // Keep track of the values which map back (through function arguments) to
- // allocas on the caller stack which could be simplified through SROA.
+ /// Keep track of the values which map back (through function arguments) to
+ /// allocas on the caller stack which could be simplified through SROA.
DenseMap<Value *, Value *> SROAArgValues;
- // The mapping of caller Alloca values to their accumulated cost savings. If
- // we have to disable SROA for one of the allocas, this tells us how much
- // cost must be added.
+ /// The mapping of caller Alloca values to their accumulated cost savings. If
+ /// we have to disable SROA for one of the allocas, this tells us how much
+ /// cost must be added.
DenseMap<Value *, int> SROAArgCosts;
- // Keep track of values which map to a pointer base and constant offset.
+ /// Keep track of values which map to a pointer base and constant offset.
DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
// Custom simplification helper routines.
@@ -203,20 +200,21 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitUnreachableInst(UnreachableInst &I);
public:
- CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
- ProfileSummaryInfo *PSI, Function &Callee, int Threshold,
- CallSite CSArg)
- : TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg),
- Threshold(Threshold), Cost(0), IsCallerRecursive(false),
- IsRecursiveCall(false), ExposesReturnsTwice(false),
- HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
- HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
- AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
- FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
- NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
- NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
- NumInstructionsSimplified(0), SROACostSavings(0),
- SROACostSavingsLost(0) {}
+ CallAnalyzer(const TargetTransformInfo &TTI,
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
+ const InlineParams &Params)
+ : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee),
+ CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
+ Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
+ ExposesReturnsTwice(false), HasDynamicAlloca(false),
+ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+ HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+ NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+ TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+ NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+ NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+ SROACostSavings(0), SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -320,7 +318,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
continue;
// Handle a struct index, which adds its field offset to the pointer.
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
unsigned ElementIdx = OpC->getZExtValue();
const StructLayout *SL = DL.getStructLayout(STy);
Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
@@ -620,42 +618,44 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
}
Function *Caller = CS.getCaller();
- if (DefaultInlineThreshold.getNumOccurrences() > 0) {
- // Explicitly specified -inline-threhold overrides the threshold passed to
- // CallAnalyzer's constructor.
- Threshold = DefaultInlineThreshold;
- } else {
- // If -inline-threshold is not given, listen to the optsize and minsize
- // attributes when they would decrease the threshold.
- if (Caller->optForMinSize() && OptMinSizeThreshold < Threshold)
- Threshold = OptMinSizeThreshold;
- else if (Caller->optForSize() && OptSizeThreshold < Threshold)
- Threshold = OptSizeThreshold;
- }
- bool HotCallsite = false;
- uint64_t TotalWeight;
- if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
- PSI->isHotCount(TotalWeight))
- HotCallsite = true;
-
- // Listen to the inlinehint attribute or profile based hotness information
- // when it would increase the threshold and the caller does not need to
- // minimize its size.
- bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
- PSI->isHotFunction(&Callee) ||
- HotCallsite;
- if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
- Threshold = HintThreshold;
-
- bool ColdCallee = PSI->isColdFunction(&Callee);
- // Command line argument for DefaultInlineThreshold will override the default
- // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
- // do not use the default cold threshold even if it is smaller.
- if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
- ColdThreshold.getNumOccurrences() > 0) &&
- ColdCallee && ColdThreshold < Threshold)
- Threshold = ColdThreshold;
+ // return min(A, B) if B is valid.
+ auto MinIfValid = [](int A, Optional<int> B) {
+ return B ? std::min(A, B.getValue()) : A;
+ };
+
+ // return max(A, B) if B is valid.
+ auto MaxIfValid = [](int A, Optional<int> B) {
+ return B ? std::max(A, B.getValue()) : A;
+ };
+
+ // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
+ // and reduce the threshold if the caller has the necessary attribute.
+ if (Caller->optForMinSize())
+ Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
+ else if (Caller->optForSize())
+ Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
+
+ // Adjust the threshold based on inlinehint attribute and profile based
+ // hotness information if the caller does not have MinSize attribute.
+ if (!Caller->optForMinSize()) {
+ if (Callee.hasFnAttribute(Attribute::InlineHint))
+ Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+ if (PSI) {
+ uint64_t TotalWeight;
+ if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
+ PSI->isHotCount(TotalWeight)) {
+ Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
+ } else if (PSI->isFunctionEntryHot(&Callee)) {
+ // If callsite hotness can not be determined, we may still know
+ // that the callee is hot and treat it as a weaker hint for threshold
+ // increase.
+ Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+ } else if (PSI->isFunctionEntryCold(&Callee)) {
+ Threshold = MinIfValid(Threshold, Params.ColdThreshold);
+ }
+ }
+ }
// Finally, take the target-specific inlining threshold multiplier into
// account.
@@ -957,8 +957,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold,
- CS);
+ auto IndirectCallParams = Params;
+ IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
+ CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
@@ -1251,13 +1252,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Cost -= InlineConstants::InstrCost;
}
}
-
+ // The call instruction also disappears after inlining.
+ Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty;
+
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically.
bool OnlyOneCallAndLocalLinkage =
F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
if (OnlyOneCallAndLocalLinkage)
- Cost += InlineConstants::LastCallToStaticBonus;
+ Cost -= InlineConstants::LastCallToStaticBonus;
// If this function uses the coldcc calling convention, prefer not to inline
// it.
@@ -1312,8 +1315,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// the ephemeral values multiple times (and they're completely determined by
// the callee, so this is purely duplicate work).
SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F),
- EphValues);
+ CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
@@ -1444,32 +1446,19 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
-InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
- TargetTransformInfo &CalleeTTI,
- AssumptionCacheTracker *ACT,
- ProfileSummaryInfo *PSI) {
- return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
- ACT, PSI);
-}
-
-int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
- unsigned SizeOptLevel) {
- if (OptLevel > 2)
- return OptAggressiveThreshold;
- if (SizeOptLevel == 1) // -Os
- return OptSizeThreshold;
- if (SizeOptLevel == 2) // -Oz
- return OptMinSizeThreshold;
- return DefaultInlineThreshold;
+InlineCost llvm::getInlineCost(
+ CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ ProfileSummaryInfo *PSI) {
+ return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
+ GetAssumptionCache, PSI);
}
-int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
-
-InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
- int DefaultThreshold,
- TargetTransformInfo &CalleeTTI,
- AssumptionCacheTracker *ACT,
- ProfileSummaryInfo *PSI) {
+InlineCost llvm::getInlineCost(
+ CallSite CS, Function *Callee, const InlineParams &Params,
+ TargetTransformInfo &CalleeTTI,
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ ProfileSummaryInfo *PSI) {
// Cannot inline indirect calls.
if (!Callee)
@@ -1494,7 +1483,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
// Don't inline functions which can be interposed at link-time. Don't inline
// functions marked noinline or call sites marked noinline.
- // Note: inlining non-exact non-interposable fucntions is fine, since we know
+ // Note: inlining non-exact non-interposable functions is fine, since we know
// we have *a* correct implementation of the source level function.
if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
CS.isNoInline())
@@ -1503,7 +1492,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS);
+ CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
@@ -1551,3 +1540,67 @@ bool llvm::isInlineViable(Function &F) {
return true;
}
+
+// APIs to create InlineParams based on command line flags and/or other
+// parameters.
+
+InlineParams llvm::getInlineParams(int Threshold) {
+ InlineParams Params;
+
+ // This field is the threshold to use for a callee by default. This is
+ // derived from one or more of:
+ // * optimization or size-optimization levels,
+ // * a value passed to createFunctionInliningPass function, or
+ // * the -inline-threshold flag.
+ // If the -inline-threshold flag is explicitly specified, that is used
+ // irrespective of anything else.
+ if (InlineThreshold.getNumOccurrences() > 0)
+ Params.DefaultThreshold = InlineThreshold;
+ else
+ Params.DefaultThreshold = Threshold;
+
+ // Set the HintThreshold knob from the -inlinehint-threshold.
+ Params.HintThreshold = HintThreshold;
+
+ // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
+ Params.HotCallSiteThreshold = HotCallSiteThreshold;
+
+ // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+ // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+ // -inlinehint-threshold commandline option is not explicitly given. If that
+ // option is present, then its value applies even for callees with size and
+ // minsize attributes.
+ // If the -inline-threshold is not specified, set the ColdThreshold from the
+ // -inlinecold-threshold even if it is not explicitly passed. If
+ // -inline-threshold is specified, then -inlinecold-threshold needs to be
+ // explicitly specified to set the ColdThreshold knob
+ if (InlineThreshold.getNumOccurrences() == 0) {
+ Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
+ Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
+ Params.ColdThreshold = ColdThreshold;
+ } else if (ColdThreshold.getNumOccurrences() > 0) {
+ Params.ColdThreshold = ColdThreshold;
+ }
+ return Params;
+}
+
+InlineParams llvm::getInlineParams() {
+ return getInlineParams(InlineThreshold);
+}
+
+// Compute the default threshold for inlining based on the opt level and the
+// size opt level.
+static int computeThresholdFromOptLevels(unsigned OptLevel,
+ unsigned SizeOptLevel) {
+ if (OptLevel > 2)
+ return InlineConstants::OptAggressiveThreshold;
+ if (SizeOptLevel == 1) // -Os
+ return InlineConstants::OptSizeThreshold;
+ if (SizeOptLevel == 2) // -Oz
+ return InlineConstants::OptMinSizeThreshold;
+ return InlineThreshold;
+}
+
+InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
+ return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+}
OpenPOWER on IntegriCloud