summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp128
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp83
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp121
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp1873
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp433
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp114
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp629
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp937
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp150
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp103
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp20
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp589
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp560
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp125
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp1265
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp12
26 files changed, 5376 insertions, 2172 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 4762011..0e05129 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -34,8 +34,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
@@ -63,7 +66,8 @@ namespace {
///
struct ArgPromotion : public CallGraphSCCPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -81,7 +85,8 @@ namespace {
bool isDenselyPacked(Type *type, const DataLayout &DL);
bool canPaddingBeAccessed(Argument *Arg);
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
- bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+ bool isSafeToPromoteArgument(Argument *Arg, bool isByVal,
+ AAResults &AAR) const;
CallGraphNode *DoPromotion(Function *F,
SmallPtrSetImpl<Argument*> &ArgsToPromote,
SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
@@ -90,15 +95,15 @@ namespace {
bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned maxElements;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
};
}
char ArgPromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -217,9 +222,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument*, 16> PointerArgs;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- if (I->getType()->isPointerTy())
- PointerArgs.push_back(I);
+ for (Argument &I : F->args())
+ if (I.getType()->isPointerTy())
+ PointerArgs.push_back(&I);
if (PointerArgs.empty()) return nullptr;
// Second check: make sure that all callers are direct callers. We can't
@@ -237,6 +242,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
const DataLayout &DL = F->getParent()->getDataLayout();
+ // We need to manually construct BasicAA directly in order to disable its use
+ // of other function analyses.
+ BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));
+
+ // Construct our own AA results for this function. We do this manually to
+ // work around the limitations of the legacy pass manager.
+ AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));
+
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
@@ -281,8 +294,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// If all the elements are single-value types, we can promote it.
bool AllSimple = true;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- if (!STy->getElementType(i)->isSingleValueType()) {
+ for (const auto *EltTy : STy->elements()) {
+ if (!EltTy->isSingleValueType()) {
AllSimple = false;
break;
}
@@ -303,8 +316,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
if (isSelfRecursive) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
bool RecursiveType = false;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- if (STy->getElementType(i) == PtrArg->getType()) {
+ for (const auto *EltTy : STy->elements()) {
+ if (EltTy == PtrArg->getType()) {
RecursiveType = true;
break;
}
@@ -315,7 +328,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr()))
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
ArgsToPromote.insert(PtrArg);
}
@@ -416,7 +429,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
- bool isByValOrInAlloca) const {
+ bool isByValOrInAlloca,
+ AAResults &AAR) const {
typedef std::set<IndicesVector> GEPIndicesSet;
// Quick exit for unused arguments
@@ -453,12 +467,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// First, iterate the entry block and mark loads of (geps of) arguments as
// safe.
- BasicBlock *EntryBlock = Arg->getParent()->begin();
+ BasicBlock &EntryBlock = Arg->getParent()->front();
// Declare this here so we can reuse it
IndicesVector Indices;
- for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
- I != E; ++I)
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ for (Instruction &I : EntryBlock)
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *V = LI->getPointerOperand();
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
V = GEP->getPointerOperand();
@@ -501,12 +514,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
// them.
- getAnalysis<AliasAnalysis>().deleteValue(GEP);
GEP->eraseFromParent();
// TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
- return isSafeToPromoteArgument(Arg, isByValOrInAlloca);
+ return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR);
}
// Ensure that all of the indices are constants.
@@ -563,8 +575,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// blocks we know to be transparent to the load.
SmallPtrSet<BasicBlock*, 16> TranspBlocks;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
// Check to see if the load is invalidated from the start of the block to
// the load itself.
@@ -572,8 +582,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
BasicBlock *BB = Load->getParent();
MemoryLocation Loc = MemoryLocation::get(Load);
- if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc,
- AliasAnalysis::Mod))
+ if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
@@ -581,7 +590,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// loading block.
for (BasicBlock *P : predecessors(BB)) {
for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
- if (AA.canBasicBlockModify(*TranspBB, Loc))
+ if (AAR.canBasicBlockModify(*TranspBB, Loc))
return false;
}
}
@@ -637,13 +646,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
unsigned ArgIndex = 1;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgIndex) {
- if (ByValArgsToTransform.count(I)) {
+ if (ByValArgsToTransform.count(&*I)) {
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
Params.insert(Params.end(), STy->element_begin(), STy->element_end());
++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(I)) {
+ } else if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
@@ -661,7 +670,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// In this table, we will track which indices are loaded from the argument
// (where direct loads are tracked as no indices).
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
for (User *U : I->users()) {
Instruction *UI = cast<Instruction>(U);
Type *SrcTy;
@@ -687,7 +696,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
else
// Take any load, we will use it only to update Alias Analysis
OrigLoad = cast<LoadInst>(UI->user_back());
- OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
+ OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
}
// Add a parameter to the function for each element passed in.
@@ -722,15 +731,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
NF->copyAttributesFrom(F);
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(F);
- if (DI != FunctionDIs.end()) {
- DISubprogram *SP = DI->second;
- SP->replaceFunction(NF);
- // Ensure the map is updated so it can be reused on subsequent argument
- // promotions of the same function.
- FunctionDIs.erase(DI);
- FunctionDIs[NF] = SP;
- }
+ NF->setSubprogram(F->getSubprogram());
+ F->setSubprogram(nullptr);
DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
@@ -740,13 +742,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
AttributesVec.clear();
- F->getParent()->getFunctionList().insert(F, NF);
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
- // Get the alias analysis information that we need to update to reflect our
- // changes.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
// Get the callgraph information that we need to update to reflect our
// changes.
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -775,7 +773,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ArgIndex = 1;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++AI, ++ArgIndex)
- if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
if (CallPAL.hasAttributes(ArgIndex)) {
@@ -783,7 +781,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
AttributesVec.
push_back(AttributeSet::get(F->getContext(), Args.size(), B));
}
- } else if (ByValArgsToTransform.count(I)) {
+ } else if (ByValArgsToTransform.count(&*I)) {
// Emit a GEP and load for each element of the struct.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
@@ -798,14 +796,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
} else if (!I->use_empty()) {
// Non-dead argument: insert GEPs and loads as appropriate.
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
// Store the Value* version of the indices in here, but declare it now
// for reuse.
std::vector<Value*> Ops;
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)];
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)];
if (!SI->second.empty()) {
Ops.reserve(SI->second.size());
Type *ElTy = V->getType();
@@ -873,10 +871,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Args.clear();
AttributesVec.clear();
- // Update the alias analysis implementation to know that we are replacing
- // the old call with a new one.
- AA.replaceWithNewValue(Call, New);
-
// Update the callgraph to know that the callsite has been transformed.
CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
@@ -901,20 +895,19 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
//
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
I2 = NF->arg_begin(); I != E; ++I) {
- if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
// If this is an unmodified argument, move the name and users over to the
// new version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
- AA.replaceWithNewValue(I, I2);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
++I2;
continue;
}
- if (ByValArgsToTransform.count(I)) {
+ if (ByValArgsToTransform.count(&*I)) {
// In the callee, we create an alloca, and store each of the new incoming
// arguments into the alloca.
- Instruction *InsertPt = NF->begin()->begin();
+ Instruction *InsertPt = &NF->begin()->front();
// Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -929,13 +922,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
InsertPt);
I2->setName(I->getName()+"."+Twine(i));
- new StoreInst(I2++, Idx, InsertPt);
+ new StoreInst(&*I2++, Idx, InsertPt);
}
// Anything that used the arg should now use the alloca.
I->replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(I);
- AA.replaceWithNewValue(I, TheAlloca);
+ TheAlloca->takeName(&*I);
// If the alloca is used in a call, we must clear the tail flag since
// the callee now uses an alloca from the caller.
@@ -948,23 +940,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
continue;
}
- if (I->use_empty()) {
- AA.deleteValue(I);
+ if (I->use_empty())
continue;
- }
// Otherwise, if we promoted this argument, then all users are load
// instructions (or GEPs with only load users), and all loads should be
// using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
while (!I->use_empty()) {
if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
assert(ArgIndices.begin()->second.empty() &&
"Load element should sort to front!");
I2->setName(I->getName()+".val");
- LI->replaceAllUsesWith(I2);
- AA.replaceWithNewValue(LI, I2);
+ LI->replaceAllUsesWith(&*I2);
LI->eraseFromParent();
DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
<< "' in function '" << F->getName() << "'\n");
@@ -1000,11 +989,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// the argument specified by ArgNo.
while (!GEP->use_empty()) {
LoadInst *L = cast<LoadInst>(GEP->user_back());
- L->replaceAllUsesWith(TheArg);
- AA.replaceWithNewValue(L, TheArg);
+ L->replaceAllUsesWith(&*TheArg);
L->eraseFromParent();
}
- AA.deleteValue(GEP);
GEP->eraseFromParent();
}
}
@@ -1013,10 +1000,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
std::advance(I2, ArgIndices.size());
}
- // Tell the alias analysis that the old function is about to disappear.
- AA.replaceWithNewValue(F, NF);
-
-
NF_CGN->stealCalledFunctionsFrom(CG[F]);
// Now that the old function is dead, delete it. If there is a dangling
@@ -1032,6 +1015,5 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
bool ArgPromotion::doInitialization(CallGraph &CG) {
- FunctionDIs = makeSubprogramMap(CG.getModule());
return CallGraphSCCPass::doInitialization(CG);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8ce7646..0aa49d6 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -119,7 +119,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// First: Find the canonical constants others will be merged with.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// If this GV is dead, remove it.
GV->removeDeadConstantUsers();
@@ -160,7 +160,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// invalidating the Constant* pointers in CMap.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// Only process constants with initializers in the default address space.
if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
new file mode 100644
index 0000000..5bbb751
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -0,0 +1,166 @@
+//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass exports all llvm.bitset's found in the module in the form of a
+// __cfi_check function, which can be used to verify cross-DSO call targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cross-dso-cfi"
+
+STATISTIC(TypeIds, "Number of unique type identifiers");
+
+namespace {
+
+struct CrossDSOCFI : public ModulePass {
+ static char ID;
+ CrossDSOCFI() : ModulePass(ID) {
+ initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry());
+ }
+
+ Module *M;
+ MDNode *VeryLikelyWeights;
+
+ ConstantInt *extractBitSetTypeId(MDNode *MD);
+ void buildCFICheck();
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false,
+ false)
+INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false)
+char CrossDSOCFI::ID = 0;
+
+ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; }
+
+bool CrossDSOCFI::doInitialization(Module &Mod) {
+ M = &Mod;
+ VeryLikelyWeights =
+ MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1);
+
+ return false;
+}
+
+/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode.
+ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) {
+ // This check excludes vtables for classes inside anonymous namespaces.
+ auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+ if (!TM)
+ return nullptr;
+ auto C = dyn_cast_or_null<ConstantInt>(TM->getValue());
+ if (!C) return nullptr;
+ // We are looking for i64 constants.
+ if (C->getBitWidth() != 64) return nullptr;
+
+ // Sanity check.
+ auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1));
+ // Can be null if a function was removed by an optimization.
+ if (FM) {
+ auto F = dyn_cast<Function>(FM->getValue());
+ // But can never be a function declaration.
+ assert(!F || !F->isDeclaration());
+ (void)F; // Suppress unused variable warning in the no-asserts build.
+ }
+ return C;
+}
+
+/// buildCFICheck - emits __cfi_check for the current module.
+void CrossDSOCFI::buildCFICheck() {
+ // FIXME: verify that __cfi_check ends up near the end of the code section,
+ // but before the jump slots created in LowerBitSets.
+ llvm::DenseSet<uint64_t> BitSetIds;
+ NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+ if (BitSetNM)
+ for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I)
+ if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I)))
+ BitSetIds.insert(TypeId->getZExtValue());
+
+ LLVMContext &Ctx = M->getContext();
+ Constant *C = M->getOrInsertFunction(
+ "__cfi_check",
+ FunctionType::get(
+ Type::getVoidTy(Ctx),
+ {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))},
+ false));
+ Function *F = dyn_cast<Function>(C);
+ F->setAlignment(4096);
+ auto args = F->arg_begin();
+ Argument &CallSiteTypeId = *(args++);
+ CallSiteTypeId.setName("CallSiteTypeId");
+ Argument &Addr = *(args++);
+ Addr.setName("Addr");
+ assert(args == F->arg_end());
+
+ BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
+
+ BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F);
+ IRBuilder<> IRBTrap(TrapBB);
+ Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
+ llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ IRBTrap.CreateUnreachable();
+
+ BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);
+ IRBuilder<> IRBExit(ExitBB);
+ IRBExit.CreateRetVoid();
+
+ IRBuilder<> IRB(BB);
+ SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size());
+ for (uint64_t TypeId : BitSetIds) {
+ ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId);
+ BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F);
+ IRBuilder<> IRBTest(TestBB);
+ Function *BitsetTestFn =
+ Intrinsic::getDeclaration(M, Intrinsic::bitset_test);
+
+ Value *Test = IRBTest.CreateCall(
+ BitsetTestFn, {&Addr, MetadataAsValue::get(
+ Ctx, ConstantAsMetadata::get(CaseTypeId))});
+ BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB);
+ BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights);
+
+ SI->addCase(CaseTypeId, TestBB);
+ ++TypeIds;
+ }
+}
+
+bool CrossDSOCFI::runOnModule(Module &M) {
+ if (M.getModuleFlag("Cross-DSO CFI") == nullptr)
+ return false;
+ buildCFICheck();
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d044764..4de3d95 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -35,6 +35,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <map>
#include <set>
#include <tuple>
@@ -121,14 +122,6 @@ namespace {
typedef SmallVector<RetOrArg, 5> UseVector;
- // Map each LLVM function to corresponding metadata with debug info. If
- // the function is replaced with another one, we should patch the pointer
- // to LLVM function in metadata.
- // As the code generation for module is finished (and DIBuilder is
- // finalized) we assume that subprogram descriptors won't be changed, and
- // they are stored in map for short duration anyway.
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
-
protected:
// DAH uses this to specify a different ID.
explicit DAE(char &ID) : ModulePass(ID) {}
@@ -198,6 +191,13 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
if (Fn.hasAddressTaken())
return false;
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (Fn.hasFnAttribute(Attribute::Naked)) {
+ return false;
+ }
+
// Okay, we know we can transform this function if safe. Scan its body
// looking for calls marked musttail or calls to llvm.vastart.
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
@@ -229,7 +229,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Create the new function body and insert it into the module...
Function *NF = Function::Create(NFTy, Fn.getLinkage());
NF->copyAttributesFrom(&Fn);
- Fn.getParent()->getFunctionList().insert(&Fn, NF);
+ Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
NF->takeName(&Fn);
// Loop over all of the callers of the function, transforming the call sites
@@ -296,20 +296,12 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
I2 = NF->arg_begin(); I != E; ++I, ++I2) {
// Move the name and users over to the new version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
}
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(&Fn);
- if (DI != FunctionDIs.end()) {
- DISubprogram *SP = DI->second;
- SP->replaceFunction(NF);
- // Ensure the map is updated so it can be reused on non-varargs argument
- // eliminations of the same function.
- FunctionDIs.erase(DI);
- FunctionDIs[NF] = SP;
- }
+ NF->setSubprogram(Fn.getSubprogram());
// Fix up any BlockAddresses that refer to the function.
Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
@@ -345,16 +337,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
return false;
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (Fn.hasFnAttribute(Attribute::Naked))
+ return false;
+
if (Fn.use_empty())
return false;
SmallVector<unsigned, 8> UnusedArgs;
- for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
- I != E; ++I) {
- Argument *Arg = I;
-
- if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr())
- UnusedArgs.push_back(Arg->getArgNo());
+ for (Argument &Arg : Fn.args()) {
+ if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+ UnusedArgs.push_back(Arg.getArgNo());
}
if (UnusedArgs.empty())
@@ -485,6 +480,10 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
if (F) {
// Used in a direct call.
+ // The function argument is live if it is used as a bundle operand.
+ if (CS.isBundleOperand(U))
+ return Live;
+
// Find the argument number. We know for sure that this use is an
// argument, since if it was the function argument this would be an
// indirect call and the we know can't be looking at a value of the
@@ -543,6 +542,14 @@ void DAE::SurveyFunction(const Function &F) {
return;
}
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (F.hasFnAttribute(Attribute::Naked)) {
+ MarkLive(F);
+ return;
+ }
+
unsigned RetCount = NumRetVals(&F);
// Assume all return values are dead
typedef SmallVector<Liveness, 5> RetVals;
@@ -648,7 +655,7 @@ void DAE::SurveyFunction(const Function &F) {
} else {
// See what the effect of this use is (recording any uses that cause
// MaybeLive in MaybeLiveArgUses).
- Result = SurveyUses(AI, MaybeLiveArgUses);
+ Result = SurveyUses(&*AI, MaybeLiveArgUses);
}
// Mark the result.
@@ -878,7 +885,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
NF->setAttributes(NewPAL);
// Insert the new function before the old function, so we won't be processing
// it again.
- F->getParent()->getFunctionList().insert(F, NF);
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
// Loop over all of the callers of the function, transforming the call sites
@@ -946,7 +953,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, "", Call);
+ Args, "", Call->getParent());
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
cast<InvokeInst>(New)->setAttributes(NewCallPAL);
} else {
@@ -976,9 +983,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
" must have been a struct or an array!");
Instruction *InsertPt = Call;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- BasicBlock::iterator IP = II->getNormalDest()->begin();
- while (isa<PHINode>(IP)) ++IP;
- InsertPt = IP;
+ BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest());
+ InsertPt = &*NewEdge->getFirstInsertionPt();
}
// We used to return a struct or array. Instead of doing smart stuff
@@ -1026,8 +1032,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
// If this is a live argument, move the name and users over to the new
// version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
++I2;
} else {
// If this argument is dead, replace any uses of it with null constants
@@ -1079,9 +1085,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(F);
- if (DI != FunctionDIs.end())
- DI->second->replaceFunction(NF);
+ NF->setSubprogram(F->getSubprogram());
// Now that the old function is dead, delete it.
F->eraseFromParent();
@@ -1092,9 +1096,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
bool DAE::runOnModule(Module &M) {
bool Changed = false;
- // Collect debug info descriptors for functions.
- FunctionDIs = makeSubprogramMap(M);
-
// First pass: Do a simple check to see if any functions can have their "..."
// removed. We can do this if they never call va_start. This loop cannot be
// fused with the next loop, because deleting a function invalidates
@@ -1119,7 +1120,7 @@ bool DAE::runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
// Increment now, because the function will probably get removed (ie.
// replaced by a new one).
- Function *F = I++;
+ Function *F = &*I++;
Changed |= RemoveDeadStuffFromFunction(F);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 67ba72d..af313a6 100644
--- a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -1,4 +1,5 @@
-//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===//
+//===-- ElimAvailExtern.cpp - DCE unreachable internal functions
+//----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,9 +16,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -28,18 +27,18 @@ STATISTIC(NumFunctions, "Number of functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
namespace {
- struct EliminateAvailableExternally : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- EliminateAvailableExternally() : ModulePass(ID) {
- initializeEliminateAvailableExternallyPass(
- *PassRegistry::getPassRegistry());
- }
+struct EliminateAvailableExternally : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ EliminateAvailableExternally() : ModulePass(ID) {
+ initializeEliminateAvailableExternallyPass(
+ *PassRegistry::getPassRegistry());
+ }
- // run - Do the EliminateAvailableExternally pass on the specified module,
- // optionally updating the specified callgraph to reflect the changes.
- //
- bool runOnModule(Module &M) override;
- };
+ // run - Do the EliminateAvailableExternally pass on the specified module,
+ // optionally updating the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M) override;
+};
}
char EliminateAvailableExternally::ID = 0;
@@ -54,30 +53,31 @@ bool EliminateAvailableExternally::runOnModule(Module &M) {
bool Changed = false;
// Drop initializers of available externally global variables.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- if (!I->hasAvailableExternallyLinkage())
+ for (GlobalVariable &GV : M.globals()) {
+ if (!GV.hasAvailableExternallyLinkage())
continue;
- if (I->hasInitializer()) {
- Constant *Init = I->getInitializer();
- I->setInitializer(nullptr);
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ GV.setInitializer(nullptr);
if (isSafeToDestroyConstant(Init))
Init->destroyConstant();
}
- I->removeDeadConstantUsers();
- I->setLinkage(GlobalValue::ExternalLinkage);
+ GV.removeDeadConstantUsers();
+ GV.setLinkage(GlobalValue::ExternalLinkage);
NumVariables++;
+ Changed = true;
}
// Drop the bodies of available externally functions.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!I->hasAvailableExternallyLinkage())
+ for (Function &F : M) {
+ if (!F.hasAvailableExternallyLinkage())
continue;
- if (!I->isDeclaration())
+ if (!F.isDeclaration())
// This will set the linkage to external
- I->deleteBody();
- I->removeDeadConstantUsers();
+ F.deleteBody();
+ F.removeDeadConstantUsers();
NumFunctions++;
+ Changed = true;
}
return Changed;
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index b9462f2..1a3b925 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -83,7 +83,7 @@ namespace {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
bool Delete =
- deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
@@ -103,7 +103,7 @@ namespace {
// Visit the Functions.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
bool Delete =
- deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
@@ -124,7 +124,7 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- bool Delete = deleteStuff == (bool)Named.count(CurI);
+ bool Delete = deleteStuff == (bool)Named.count(&*CurI);
makeVisible(*CurI, Delete);
if (Delete) {
@@ -143,7 +143,7 @@ namespace {
}
CurI->replaceAllUsesWith(Declaration);
- delete CurI;
+ delete &*CurI;
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
new file mode 100644
index 0000000..816291d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -0,0 +1,121 @@
+//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "forceattrs"
+
+static cl::list<std::string>
+ ForceAttributes("force-attribute", cl::Hidden,
+ cl::desc("Add an attribute to a function. This should be a "
+ "pair of 'function-name:attribute-name', for "
+ "example -force-add-attribute=foo:noinline. This "
+ "option can be specified multiple times."));
+
+static Attribute::AttrKind parseAttrKind(StringRef Kind) {
+ return StringSwitch<Attribute::AttrKind>(Kind)
+ .Case("alwaysinline", Attribute::AlwaysInline)
+ .Case("builtin", Attribute::Builtin)
+ .Case("cold", Attribute::Cold)
+ .Case("convergent", Attribute::Convergent)
+ .Case("inlinehint", Attribute::InlineHint)
+ .Case("jumptable", Attribute::JumpTable)
+ .Case("minsize", Attribute::MinSize)
+ .Case("naked", Attribute::Naked)
+ .Case("nobuiltin", Attribute::NoBuiltin)
+ .Case("noduplicate", Attribute::NoDuplicate)
+ .Case("noimplicitfloat", Attribute::NoImplicitFloat)
+ .Case("noinline", Attribute::NoInline)
+ .Case("nonlazybind", Attribute::NonLazyBind)
+ .Case("noredzone", Attribute::NoRedZone)
+ .Case("noreturn", Attribute::NoReturn)
+ .Case("norecurse", Attribute::NoRecurse)
+ .Case("nounwind", Attribute::NoUnwind)
+ .Case("optnone", Attribute::OptimizeNone)
+ .Case("optsize", Attribute::OptimizeForSize)
+ .Case("readnone", Attribute::ReadNone)
+ .Case("readonly", Attribute::ReadOnly)
+ .Case("argmemonly", Attribute::ArgMemOnly)
+ .Case("returns_twice", Attribute::ReturnsTwice)
+ .Case("safestack", Attribute::SafeStack)
+ .Case("sanitize_address", Attribute::SanitizeAddress)
+ .Case("sanitize_memory", Attribute::SanitizeMemory)
+ .Case("sanitize_thread", Attribute::SanitizeThread)
+ .Case("ssp", Attribute::StackProtect)
+ .Case("sspreq", Attribute::StackProtectReq)
+ .Case("sspstrong", Attribute::StackProtectStrong)
+ .Case("uwtable", Attribute::UWTable)
+ .Default(Attribute::None);
+}
+
+/// If F has any forced attributes given on the command line, add them.
+static void addForcedAttributes(Function &F) {
+ for (auto &S : ForceAttributes) {
+ auto KV = StringRef(S).split(':');
+ if (KV.first != F.getName())
+ continue;
+
+ auto Kind = parseAttrKind(KV.second);
+ if (Kind == Attribute::None) {
+ DEBUG(dbgs() << "ForcedAttribute: " << KV.second
+ << " unknown or not handled!\n");
+ continue;
+ }
+ if (F.hasFnAttribute(Kind))
+ continue;
+ F.addFnAttr(Kind);
+ }
+}
+
+PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) {
+ if (ForceAttributes.empty())
+ return PreservedAnalyses::all();
+
+ for (Function &F : M.functions())
+ addForcedAttributes(F);
+
+ // Just conservatively invalidate analyses, this isn't likely to be important.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct ForceFunctionAttrsLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ForceFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeForceFunctionAttrsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (ForceAttributes.empty())
+ return false;
+
+ for (Function &F : M.functions())
+ addForcedAttributes(F);
+
+ // Conservatively assume we changed something.
+ return true;
+ }
+};
+}
+
+char ForceFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs",
+ "Force set function attributes", false, false)
+
+Pass *llvm::createForceFunctionAttrsLegacyPass() {
+ return new ForceFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index bb5e64a..6dcfb3f 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -23,14 +23,21 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
@@ -42,230 +49,191 @@ STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
+STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
namespace {
- struct FunctionAttrs : public CallGraphSCCPass {
- static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) {
- initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
- }
-
- // runOnSCC - Analyze the SCC, performing the transformation if possible.
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
- bool AddReadAttrs(const CallGraphSCC &SCC);
-
- // AddArgumentAttrs - Deduce nocapture attributes for the SCC.
- bool AddArgumentAttrs(const CallGraphSCC &SCC);
-
- // IsFunctionMallocLike - Does this function allocate new memory?
- bool IsFunctionMallocLike(Function *F,
- SmallPtrSet<Function*, 8> &) const;
-
- // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
- bool AddNoAliasAttrs(const CallGraphSCC &SCC);
-
- // Utility methods used by inferPrototypeAttributes to add attributes
- // and maintain annotation statistics.
-
- void setDoesNotAccessMemory(Function &F) {
- if (!F.doesNotAccessMemory()) {
- F.setDoesNotAccessMemory();
- ++NumAnnotated;
- }
- }
-
- void setOnlyReadsMemory(Function &F) {
- if (!F.onlyReadsMemory()) {
- F.setOnlyReadsMemory();
- ++NumAnnotated;
- }
- }
-
- void setDoesNotThrow(Function &F) {
- if (!F.doesNotThrow()) {
- F.setDoesNotThrow();
- ++NumAnnotated;
- }
- }
-
- void setDoesNotCapture(Function &F, unsigned n) {
- if (!F.doesNotCapture(n)) {
- F.setDoesNotCapture(n);
- ++NumAnnotated;
- }
- }
-
- void setOnlyReadsMemory(Function &F, unsigned n) {
- if (!F.onlyReadsMemory(n)) {
- F.setOnlyReadsMemory(n);
- ++NumAnnotated;
- }
- }
-
- void setDoesNotAlias(Function &F, unsigned n) {
- if (!F.doesNotAlias(n)) {
- F.setDoesNotAlias(n);
- ++NumAnnotated;
- }
- }
-
- // inferPrototypeAttributes - Analyze the name and prototype of the
- // given function and set any applicable attributes. Returns true
- // if any attributes were set and false otherwise.
- bool inferPrototypeAttributes(Function &F);
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
+}
- // annotateLibraryCalls - Adds attributes to well-known standard library
- // call declarations.
- bool annotateLibraryCalls(const CallGraphSCC &SCC);
+namespace {
+struct FunctionAttrs : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ FunctionAttrs() : CallGraphSCCPass(ID) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ bool doInitialization(CallGraph &CG) override {
+ Revisit.clear();
+ return false;
+ }
+ bool doFinalization(CallGraph &CG) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
- private:
- AliasAnalysis *AA;
- TargetLibraryInfo *TLI;
- };
+private:
+ TargetLibraryInfo *TLI;
+ SmallVector<WeakVH,16> Revisit;
+};
}
char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+ "Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false)
+ "Deduce function attributes", false, false)
Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+namespace {
+/// The three kinds of memory access relevant to 'readonly' and
+/// 'readnone' attributes.
+enum MemoryAccessKind {
+ MAK_ReadNone = 0,
+ MAK_ReadOnly = 1,
+ MAK_MayWrite = 2
+};
+}
-/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- SCCNodes.insert((*I)->getFunction());
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+ const SCCNodeSet &SCCNodes) {
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
+ if (MRB == FMRB_DoesNotAccessMemory)
+ // Already perfect!
+ return MAK_ReadNone;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F.isDeclaration() || F.mayBeOverridden()) {
+ if (AliasAnalysis::onlyReadsMemory(MRB))
+ return MAK_ReadOnly;
+
+ // Conservatively assume it writes to memory.
+ return MAK_MayWrite;
+ }
- // Check if any of the functions in the SCC read or write memory. If they
- // write memory then they can't be marked readnone or readonly.
+ // Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or node we don't want to optimize - assume it may write
- // memory and give up.
- return false;
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+
+ // Some instructions can be ignored even if they read or write memory.
+ // Detect these now, skipping to the next instruction if one is found.
+ CallSite CS(cast<Value>(I));
+ if (CS) {
+ // Ignore calls to functions in the same SCC.
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ continue;
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
- AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
- if (MRB == AliasAnalysis::DoesNotAccessMemory)
- // Already perfect!
- continue;
+ // If the call doesn't access memory, we're done.
+ if (!(MRB & MRI_ModRef))
+ continue;
- // Definitions with weak linkage may be overridden at linktime with
- // something that writes memory, so treat them like declarations.
- if (F->isDeclaration() || F->mayBeOverridden()) {
- if (!AliasAnalysis::onlyReadsMemory(MRB))
- // May write memory. Just give up.
- return false;
+ if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ // The call could access any memory. If that includes writes, give up.
+ if (MRB & MRI_Mod)
+ return MAK_MayWrite;
+ // If it reads, note it.
+ if (MRB & MRI_Ref)
+ ReadsMemory = true;
+ continue;
+ }
- ReadsMemory = true;
- continue;
- }
+ // Check whether all pointer arguments point to local memory, and
+ // ignore calls that only access local memory.
+ for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI) {
+ Value *Arg = *CI;
+ if (!Arg->getType()->isPtrOrPtrVectorTy())
+ continue;
- // Scan the function body for instructions that may read or write memory.
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+ MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
- // Some instructions can be ignored even if they read or write memory.
- // Detect these now, skipping to the next instruction if one is found.
- CallSite CS(cast<Value>(I));
- if (CS) {
- // Ignore calls to functions in the same SCC.
- if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ // Skip accesses to local or constant memory as they don't impact the
+ // externally visible mod/ref behavior.
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
- AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
- // If the call doesn't access arbitrary memory, we may be able to
- // figure out something.
- if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
- // If the call does access argument pointees, check each argument.
- if (AliasAnalysis::doesAccessArgPointees(MRB))
- // Check whether all pointer arguments point to local memory, and
- // ignore calls that only access local memory.
- for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
- CI != CE; ++CI) {
- Value *Arg = *CI;
- if (Arg->getType()->isPointerTy()) {
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
-
- MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
- if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
- if (MRB & AliasAnalysis::Mod)
- // Writes non-local memory. Give up.
- return false;
- if (MRB & AliasAnalysis::Ref)
- // Ok, it reads non-local memory.
- ReadsMemory = true;
- }
- }
- }
- continue;
- }
- // The call could access any memory. If that includes writes, give up.
- if (MRB & AliasAnalysis::Mod)
- return false;
- // If it reads, note it.
- if (MRB & AliasAnalysis::Ref)
+
+ if (MRB & MRI_Mod)
+ // Writes non-local memory. Give up.
+ return MAK_MayWrite;
+ if (MRB & MRI_Ref)
+ // Ok, it reads non-local memory.
ReadsMemory = true;
- continue;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- // Ignore non-volatile loads from local memory. (Atomic is okay here.)
- if (!LI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(LI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Ignore non-volatile stores to local memory. (Atomic is okay here.)
- if (!SI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(SI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
- } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
- // Ignore vaargs on local memory.
- MemoryLocation Loc = MemoryLocation::get(VI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ }
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads from local memory. (Atomic is okay here.)
+ if (!LI->isVolatile()) {
+ MemoryLocation Loc = MemoryLocation::get(LI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Ignore non-volatile stores to local memory. (Atomic is okay here.)
+ if (!SI->isVolatile()) {
+ MemoryLocation Loc = MemoryLocation::get(SI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ // Ignore vaargs on local memory.
+ MemoryLocation Loc = MemoryLocation::get(VI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
- // Any remaining instructions need to be taken seriously! Check if they
- // read or write memory.
- if (I->mayWriteToMemory())
- // Writes memory. Just give up.
- return false;
+ // Any remaining instructions need to be taken seriously! Check if they
+ // read or write memory.
+ if (I->mayWriteToMemory())
+ // Writes memory. Just give up.
+ return MAK_MayWrite;
+
+ // If this instruction may read memory, remember that.
+ ReadsMemory |= I->mayReadFromMemory();
+ }
+
+ return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
+}
- // If this instruction may read memory, remember that.
- ReadsMemory |= I->mayReadFromMemory();
+/// Deduce readonly/readnone attributes for the SCC.
+template <typename AARGetterT>
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
+ // Check if any of the functions in the SCC read or write memory. If they
+ // write memory then they can't be marked readnone or readonly.
+ bool ReadsMemory = false;
+ for (Function *F : SCCNodes) {
+ // Call the callable parameter to look up AA results for this function.
+ AAResults &AAR = AARGetter(*F);
+
+ switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+ case MAK_MayWrite:
+ return false;
+ case MAK_ReadOnly:
+ ReadsMemory = true;
+ break;
+ case MAK_ReadNone:
+ // Nothing to do!
+ break;
}
}
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
bool MadeChange = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
+ for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
// Already perfect!
continue;
@@ -278,11 +246,10 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Clear out any existing attributes.
AttrBuilder B;
- B.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone);
- F->removeAttributes(AttributeSet::FunctionIndex,
- AttributeSet::get(F->getContext(),
- AttributeSet::FunctionIndex, B));
+ B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+ F->removeAttributes(
+ AttributeSet::FunctionIndex,
+ AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
// Add in the new attribute.
F->addAttribute(AttributeSet::FunctionIndex,
@@ -298,124 +265,140 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
}
namespace {
- // For a given pointer Argument, this retains a list of Arguments of functions
- // in the same SCC that the pointer data flows into. We use this to build an
- // SCC of the arguments.
- struct ArgumentGraphNode {
- Argument *Definition;
- SmallVector<ArgumentGraphNode*, 4> Uses;
- };
-
- class ArgumentGraph {
- // We store pointers to ArgumentGraphNode objects, so it's important that
- // that they not move around upon insert.
- typedef std::map<Argument*, ArgumentGraphNode> ArgumentMapTy;
+/// For a given pointer Argument, this retains a list of Arguments of functions
+/// in the same SCC that the pointer data flows into. We use this to build an
+/// SCC of the arguments.
+struct ArgumentGraphNode {
+ Argument *Definition;
+ SmallVector<ArgumentGraphNode *, 4> Uses;
+};
+
+class ArgumentGraph {
+ // We store pointers to ArgumentGraphNode objects, so it's important that
+ // that they not move around upon insert.
+ typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy;
+
+ ArgumentMapTy ArgumentMap;
+
+ // There is no root node for the argument graph, in fact:
+ // void f(int *x, int *y) { if (...) f(x, y); }
+ // is an example where the graph is disconnected. The SCCIterator requires a
+ // single entry point, so we maintain a fake ("synthetic") root node that
+ // uses every node. Because the graph is directed and nothing points into
+ // the root, it will not participate in any SCCs (except for its own).
+ ArgumentGraphNode SyntheticRoot;
+
+public:
+ ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+
+ typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator;
+
+ iterator begin() { return SyntheticRoot.Uses.begin(); }
+ iterator end() { return SyntheticRoot.Uses.end(); }
+ ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+
+ ArgumentGraphNode *operator[](Argument *A) {
+ ArgumentGraphNode &Node = ArgumentMap[A];
+ Node.Definition = A;
+ SyntheticRoot.Uses.push_back(&Node);
+ return &Node;
+ }
+};
- ArgumentMapTy ArgumentMap;
+/// This tracker checks whether callees are in the SCC, and if so it does not
+/// consider that a capture, instead adding it to the "Uses" list and
+/// continuing with the analysis.
+struct ArgumentUsesTracker : public CaptureTracker {
+ ArgumentUsesTracker(const SCCNodeSet &SCCNodes)
+ : Captured(false), SCCNodes(SCCNodes) {}
- // There is no root node for the argument graph, in fact:
- // void f(int *x, int *y) { if (...) f(x, y); }
- // is an example where the graph is disconnected. The SCCIterator requires a
- // single entry point, so we maintain a fake ("synthetic") root node that
- // uses every node. Because the graph is directed and nothing points into
- // the root, it will not participate in any SCCs (except for its own).
- ArgumentGraphNode SyntheticRoot;
+ void tooManyUses() override { Captured = true; }
- public:
- ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+ bool captured(const Use *U) override {
+ CallSite CS(U->getUser());
+ if (!CS.getInstruction()) {
+ Captured = true;
+ return true;
+ }
- typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
+ Function *F = CS.getCalledFunction();
+ if (!F || F->isDeclaration() || F->mayBeOverridden() ||
+ !SCCNodes.count(F)) {
+ Captured = true;
+ return true;
+ }
- iterator begin() { return SyntheticRoot.Uses.begin(); }
- iterator end() { return SyntheticRoot.Uses.end(); }
- ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+ // Note: the callee and the two successor blocks *follow* the argument
+ // operands. This means there is no need to adjust UseIndex to account for
+ // these.
- ArgumentGraphNode *operator[](Argument *A) {
- ArgumentGraphNode &Node = ArgumentMap[A];
- Node.Definition = A;
- SyntheticRoot.Uses.push_back(&Node);
- return &Node;
- }
- };
+ unsigned UseIndex =
+ std::distance(const_cast<const Use *>(CS.arg_begin()), U);
- // This tracker checks whether callees are in the SCC, and if so it does not
- // consider that a capture, instead adding it to the "Uses" list and
- // continuing with the analysis.
- struct ArgumentUsesTracker : public CaptureTracker {
- ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes)
- : Captured(false), SCCNodes(SCCNodes) {}
+ assert(UseIndex < CS.data_operands_size() &&
+ "Indirect function calls should have been filtered above!");
- void tooManyUses() override { Captured = true; }
+ if (UseIndex >= CS.getNumArgOperands()) {
+ // Data operand, but not a argument operand -- must be a bundle operand
+ assert(CS.hasOperandBundles() && "Must be!");
- bool captured(const Use *U) override {
- CallSite CS(U->getUser());
- if (!CS.getInstruction()) { Captured = true; return true; }
+ // CaptureTracking told us that we're being captured by an operand bundle
+ // use. In this case it does not matter if the callee is within our SCC
+ // or not -- we've been captured in some unknown way, and we have to be
+ // conservative.
+ Captured = true;
+ return true;
+ }
- Function *F = CS.getCalledFunction();
- if (!F || !SCCNodes.count(F)) { Captured = true; return true; }
-
- bool Found = false;
- Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end();
- PI != PE; ++PI, ++AI) {
- if (AI == AE) {
- assert(F->isVarArg() && "More params than args in non-varargs call");
- Captured = true;
- return true;
- }
- if (PI == U) {
- Uses.push_back(AI);
- Found = true;
- break;
- }
- }
- assert(Found && "Capturing call-site captured nothing?");
- (void)Found;
- return false;
+ if (UseIndex >= F->arg_size()) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ Captured = true;
+ return true;
}
- bool Captured; // True only if certainly captured (used outside our SCC).
- SmallVector<Argument*, 4> Uses; // Uses within our SCC.
+ Uses.push_back(&*std::next(F->arg_begin(), UseIndex));
+ return false;
+ }
- const SmallPtrSet<Function*, 8> &SCCNodes;
- };
+ bool Captured; // True only if certainly captured (used outside our SCC).
+ SmallVector<Argument *, 4> Uses; // Uses within our SCC.
+
+ const SCCNodeSet &SCCNodes;
+};
}
namespace llvm {
- template<> struct GraphTraits<ArgumentGraphNode*> {
- typedef ArgumentGraphNode NodeType;
- typedef SmallVectorImpl<ArgumentGraphNode*>::iterator ChildIteratorType;
+template <> struct GraphTraits<ArgumentGraphNode *> {
+ typedef ArgumentGraphNode NodeType;
+ typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
- static inline NodeType *getEntryNode(NodeType *A) { return A; }
- static inline ChildIteratorType child_begin(NodeType *N) {
- return N->Uses.begin();
- }
- static inline ChildIteratorType child_end(NodeType *N) {
- return N->Uses.end();
- }
- };
- template<> struct GraphTraits<ArgumentGraph*>
- : public GraphTraits<ArgumentGraphNode*> {
- static NodeType *getEntryNode(ArgumentGraph *AG) {
- return AG->getEntryNode();
- }
- static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
- return AG->begin();
- }
- static ChildIteratorType nodes_end(ArgumentGraph *AG) {
- return AG->end();
- }
- };
+ static inline NodeType *getEntryNode(NodeType *A) { return A; }
+ static inline ChildIteratorType child_begin(NodeType *N) {
+ return N->Uses.begin();
+ }
+ static inline ChildIteratorType child_end(NodeType *N) {
+ return N->Uses.end();
+ }
+};
+template <>
+struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
+ static NodeType *getEntryNode(ArgumentGraph *AG) {
+ return AG->getEntryNode();
+ }
+ static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+ return AG->begin();
+ }
+ static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
+};
}
-// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
+/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
static Attribute::AttrKind
determinePointerReadAttrs(Argument *A,
- const SmallPtrSet<Argument*, 8> &SCCNodes) {
-
- SmallVector<Use*, 32> Worklist;
- SmallSet<Use*, 32> Visited;
- int Count = 0;
+ const SmallPtrSet<Argument *, 8> &SCCNodes) {
+
+ SmallVector<Use *, 32> Worklist;
+ SmallSet<Use *, 32> Visited;
// inalloca arguments are always clobbered by the call.
if (A->hasInAllocaAttr())
@@ -425,9 +408,6 @@ determinePointerReadAttrs(Argument *A,
// We don't need to track IsWritten. If A is written to, return immediately.
for (Use &U : A->uses()) {
- if (Count++ >= 20)
- return Attribute::None;
-
Visited.insert(&U);
Worklist.push_back(&U);
}
@@ -435,7 +415,6 @@ determinePointerReadAttrs(Argument *A,
while (!Worklist.empty()) {
Use *U = Worklist.pop_back_val();
Instruction *I = cast<Instruction>(U->getUser());
- Value *V = U->get();
switch (I->getOpcode()) {
case Instruction::BitCast:
@@ -479,24 +458,44 @@ determinePointerReadAttrs(Argument *A,
return Attribute::None;
}
- Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
- for (CallSite::arg_iterator A = B; A != E; ++A, ++AI) {
- if (A->get() == V) {
- if (AI == AE) {
- assert(F->isVarArg() &&
- "More params than args in non-varargs call.");
- return Attribute::None;
- }
- Captures &= !CS.doesNotCapture(A - B);
- if (SCCNodes.count(AI))
- continue;
- if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B))
- return Attribute::None;
- if (!CS.doesNotAccessMemory(A - B))
- IsRead = true;
- }
+ // Note: the callee and the two successor blocks *follow* the argument
+ // operands. This means there is no need to adjust UseIndex to account
+ // for these.
+
+ unsigned UseIndex = std::distance(CS.arg_begin(), U);
+
+ // U cannot be the callee operand use: since we're exploring the
+ // transitive uses of an Argument, having such a use be a callee would
+ // imply the CallSite is an indirect call or invoke; and we'd take the
+ // early exit above.
+ assert(UseIndex < CS.data_operands_size() &&
+ "Data operand use expected!");
+
+ bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands();
+
+ if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ return Attribute::None;
}
+
+ Captures &= !CS.doesNotCapture(UseIndex);
+
+ // Since the optimizer (by design) cannot see the data flow corresponding
+ // to a operand bundle use, these cannot participate in the optimistic SCC
+ // analysis. Instead, we model the operand bundle uses as arguments in
+ // call to a function external to the SCC.
+ if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) ||
+ IsOperandBundleUse) {
+
+ // The accessors used on CallSite here do the right thing for calls and
+ // invokes with operand bundles.
+
+ if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex))
+ return Attribute::None;
+ if (!CS.doesNotAccessMemory(UseIndex))
+ IsRead = true;
+ }
+
AddUsersToWorklistIfCapturing();
break;
}
@@ -517,21 +516,10 @@ determinePointerReadAttrs(Argument *A,
return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
}
-/// AddArgumentAttrs - Deduce nocapture attributes for the SCC.
-bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
+/// Deduce nocapture attributes for the SCC.
+static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
- if (F && !F->isDeclaration() && !F->mayBeOverridden() &&
- !F->hasFnAttribute(Attribute::OptimizeNone))
- SCCNodes.insert(F);
- }
-
ArgumentGraph AG;
AttrBuilder B;
@@ -539,14 +527,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// Check each function in turn, determining which pointer arguments are not
// captured.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or function we're trying not to optimize - only a problem
- // for arguments that we pass to it.
- continue;
-
+ for (Function *F : SCCNodes) {
// Definitions with weak linkage may be overridden at linktime with
// something that captures pointers, so treat them like declarations.
if (F->isDeclaration() || F->mayBeOverridden())
@@ -556,8 +537,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// a value can't capture arguments. Don't analyze them.
if (F->onlyReadsMemory() && F->doesNotThrow() &&
F->getReturnType()->isVoidTy()) {
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
- A != E; ++A) {
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+ ++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
@@ -567,26 +548,30 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
continue;
}
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
- A != E; ++A) {
- if (!A->getType()->isPointerTy()) continue;
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+ ++A) {
+ if (!A->getType()->isPointerTy())
+ continue;
bool HasNonLocalUses = false;
if (!A->hasNoCaptureAttr()) {
ArgumentUsesTracker Tracker(SCCNodes);
- PointerMayBeCaptured(A, &Tracker);
+ PointerMayBeCaptured(&*A, &Tracker);
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B));
+ A->addAttr(
+ AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
} else {
// If it's not trivially captured and not trivially not captured,
// then it must be calling into another function in our SCC. Save
// its particulars for Argument-SCC analysis later.
- ArgumentGraphNode *Node = AG[A];
- for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(),
- UE = Tracker.Uses.end(); UI != UE; ++UI) {
+ ArgumentGraphNode *Node = AG[&*A];
+ for (SmallVectorImpl<Argument *>::iterator
+ UI = Tracker.Uses.begin(),
+ UE = Tracker.Uses.end();
+ UI != UE; ++UI) {
Node->Uses.push_back(AG[*UI]);
if (*UI != A)
HasNonLocalUses = true;
@@ -600,9 +585,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// Note that we don't allow any calls at all here, or else our result
// will be dependent on the iteration order through the functions in the
// SCC.
- SmallPtrSet<Argument*, 8> Self;
- Self.insert(A);
- Attribute::AttrKind R = determinePointerReadAttrs(A, Self);
+ SmallPtrSet<Argument *, 8> Self;
+ Self.insert(&*A);
+ Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None) {
AttrBuilder B;
B.addAttribute(R);
@@ -621,10 +606,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// made. If the definition doesn't have a 'nocapture' attribute by now, it
// captures.
- for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
+ for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
if (ArgumentSCC.size() == 1) {
- if (!ArgumentSCC[0]->Definition) continue; // synthetic root node
+ if (!ArgumentSCC[0]->Definition)
+ continue; // synthetic root node
// eg. "void f(int* x) { if (...) f(x); }"
if (ArgumentSCC[0]->Uses.size() == 1 &&
@@ -646,9 +632,10 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
SCCCaptured = true;
}
}
- if (SCCCaptured) continue;
+ if (SCCCaptured)
+ continue;
- SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
+ SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
// Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
// quickly looking up whether a given Argument is in this ArgumentSCC.
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
@@ -658,8 +645,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *N = *I;
- for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
- UE = N->Uses.end(); UI != UE; ++UI) {
+ for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(),
+ UE = N->Uses.end();
+ UI != UE; ++UI) {
Argument *A = (*UI)->Definition;
if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
continue;
@@ -667,7 +655,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
break;
}
}
- if (SCCCaptured) continue;
+ if (SCCCaptured)
+ continue;
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
@@ -704,8 +693,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
if (ReadAttr != Attribute::None) {
AttrBuilder B, R;
B.addAttribute(ReadAttr);
- R.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone);
+ R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
// Clear out existing readonly/readnone attributes
@@ -720,10 +708,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
return Changed;
}
-/// IsFunctionMallocLike - A function is malloc-like if it returns either null
-/// or a pointer that doesn't alias any other pointer visible to the caller.
-bool FunctionAttrs::IsFunctionMallocLike(Function *F,
- SmallPtrSet<Function*, 8> &SCCNodes) const {
+/// Tests whether a function is "malloc-like".
+///
+/// A function is "malloc-like" if it returns either null or a pointer that
+/// doesn't alias any other pointer visible to the caller.
+static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
SmallSetVector<Value *, 8> FlowsToReturn;
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -744,39 +733,38 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
switch (RVI->getOpcode()) {
- // Extend the analysis by looking upwards.
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- case Instruction::AddrSpaceCast:
- FlowsToReturn.insert(RVI->getOperand(0));
- continue;
- case Instruction::Select: {
- SelectInst *SI = cast<SelectInst>(RVI);
- FlowsToReturn.insert(SI->getTrueValue());
- FlowsToReturn.insert(SI->getFalseValue());
- continue;
- }
- case Instruction::PHI: {
- PHINode *PN = cast<PHINode>(RVI);
- for (Value *IncValue : PN->incoming_values())
- FlowsToReturn.insert(IncValue);
- continue;
- }
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (Value *IncValue : PN->incoming_values())
+ FlowsToReturn.insert(IncValue);
+ continue;
+ }
- // Check whether the pointer came from an allocation.
- case Instruction::Alloca:
+ // Check whether the pointer came from an allocation.
+ case Instruction::Alloca:
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ if (CS.paramHasAttr(0, Attribute::NoAlias))
+ break;
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
break;
- case Instruction::Call:
- case Instruction::Invoke: {
- CallSite CS(RVI);
- if (CS.paramHasAttr(0, Attribute::NoAlias))
- break;
- if (CS.getCalledFunction() &&
- SCCNodes.count(CS.getCalledFunction()))
- break;
- } // fall-through
- default:
- return false; // Did not come from an allocation.
+ } // fall-through
+ default:
+ return false; // Did not come from an allocation.
}
if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
@@ -786,24 +774,11 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
return true;
}
-/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- SCCNodes.insert((*I)->getFunction());
-
+/// Deduce noalias attributes for the SCC.
+static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
// Check each function in turn, determining which functions return noalias
// pointers.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or node we don't want to optimize - skip it;
- return false;
-
+ for (Function *F : SCCNodes) {
// Already noalias.
if (F->doesNotAlias(0))
continue;
@@ -813,18 +788,17 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
if (F->isDeclaration() || F->mayBeOverridden())
return false;
- // We annotate noalias return values, which are only applicable to
+ // We annotate noalias return values, which are only applicable to
// pointer types.
if (!F->getReturnType()->isPointerTy())
continue;
- if (!IsFunctionMallocLike(F, SCCNodes))
+ if (!isFunctionMallocLike(F, SCCNodes))
return false;
}
bool MadeChange = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
+ for (Function *F : SCCNodes) {
if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
continue;
@@ -836,880 +810,249 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
return MadeChange;
}
-/// inferPrototypeAttributes - Analyze the name and prototype of the
-/// given function and set any applicable attributes. Returns true
-/// if any attributes were set and false otherwise.
-bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
- if (F.hasFnAttribute(Attribute::OptimizeNone))
- return false;
+/// Tests whether this function is known to not return null.
+///
+/// Requires that the function returns a pointer.
+///
+/// Returns true if it believes the function will not return a null, and sets
+/// \p Speculative based on whether the returned conclusion is a speculative
+/// conclusion due to SCC calls.
+static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
+ const TargetLibraryInfo &TLI, bool &Speculative) {
+ assert(F->getReturnType()->isPointerTy() &&
+ "nonnull only meaningful on pointer types");
+ Speculative = false;
- FunctionType *FTy = F.getFunctionType();
- LibFunc::Func TheLibFunc;
- if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
- return false;
+ SmallSetVector<Value *, 8> FlowsToReturn;
+ for (BasicBlock &BB : *F)
+ if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
- switch (TheLibFunc) {
- case LibFunc::strlen:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::strchr:
- case LibFunc::strrchr:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isIntegerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- break;
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::strcpy:
- case LibFunc::stpcpy:
- case LibFunc::strcat:
- case LibFunc::strncat:
- case LibFunc::strncpy:
- case LibFunc::stpncpy:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::strxfrm:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::strcmp: //0,1
- case LibFunc::strspn: // 0,1
- case LibFunc::strncmp: // 0,1
- case LibFunc::strcspn: //0,1
- case LibFunc::strcoll: //0,1
- case LibFunc::strcasecmp: // 0,1
- case LibFunc::strncasecmp: //
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::strstr:
- case LibFunc::strpbrk:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::strtok:
- case LibFunc::strtok_r:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::scanf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::setbuf:
- case LibFunc::setvbuf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::strdup:
- case LibFunc::strndup:
- if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::stat:
- case LibFunc::statvfs:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::sscanf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::sprintf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::snprintf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 3);
- break;
- case LibFunc::setitimer:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::system:
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "system" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::malloc:
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::memcmp:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::memchr:
- case LibFunc::memrchr:
- if (FTy->getNumParams() != 3)
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- break;
- case LibFunc::modf:
- case LibFunc::modff:
- case LibFunc::modfl:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::memcpy:
- case LibFunc::memccpy:
- case LibFunc::memmove:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::memalign:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::mkdir:
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::mktime:
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::realloc:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::read:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "read" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::rewind:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::rmdir:
- case LibFunc::remove:
- case LibFunc::realpath:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::rename:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::readlink:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::write:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "write" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::bcopy:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::bcmp:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::bzero:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::calloc:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::chmod:
- case LibFunc::chown:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::ctermid:
- case LibFunc::clearerr:
- case LibFunc::closedir:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::atoi:
- case LibFunc::atol:
- case LibFunc::atof:
- case LibFunc::atoll:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::access:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::fopen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fdopen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::feof:
- case LibFunc::free:
- case LibFunc::fseek:
- case LibFunc::ftell:
- case LibFunc::fgetc:
- case LibFunc::fseeko:
- case LibFunc::ftello:
- case LibFunc::fileno:
- case LibFunc::fflush:
- case LibFunc::fclose:
- case LibFunc::fsetpos:
- case LibFunc::flockfile:
- case LibFunc::funlockfile:
- case LibFunc::ftrylockfile:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::ferror:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F);
- break;
- case LibFunc::fputc:
- case LibFunc::fstat:
- case LibFunc::frexp:
- case LibFunc::frexpf:
- case LibFunc::frexpl:
- case LibFunc::fstatvfs:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::fgets:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 3);
- break;
- case LibFunc::fread:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::fwrite:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::fputs:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::fscanf:
- case LibFunc::fprintf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fgetpos:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::getc:
- case LibFunc::getlogin_r:
- case LibFunc::getc_unlocked:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::getenv:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::gets:
- case LibFunc::getchar:
- setDoesNotThrow(F);
- break;
- case LibFunc::getitimer:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::getpwnam:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::ungetc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::uname:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::unlink:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::unsetenv:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::utime:
- case LibFunc::utimes:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::putc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::puts:
- case LibFunc::printf:
- case LibFunc::perror:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::pread:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "pread" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::pwrite:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "pwrite" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::putchar:
- setDoesNotThrow(F);
- break;
- case LibFunc::popen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::pclose:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::vscanf:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::vsscanf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::vfscanf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::valloc:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::vprintf:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::vfprintf:
- case LibFunc::vsprintf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::vsnprintf:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 3);
- break;
- case LibFunc::open:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::opendir:
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::tmpfile:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::times:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::htonl:
- case LibFunc::htons:
- case LibFunc::ntohl:
- case LibFunc::ntohs:
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- break;
- case LibFunc::lstat:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::lchown:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::qsort:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
- return false;
- // May throw; places call through function pointer.
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::dunder_strdup:
- case LibFunc::dunder_strndup:
- if (FTy->getNumParams() < 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::dunder_strtok_r:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::under_IO_getc:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::under_IO_putc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::dunder_isoc99_scanf:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::stat64:
- case LibFunc::lstat64:
- case LibFunc::statvfs64:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::dunder_isoc99_sscanf:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fopen64:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fseeko64:
- case LibFunc::ftello64:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::tmpfile64:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::fstat64:
- case LibFunc::fstatvfs64:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::open64:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i];
+
+ // If this value is locally known to be non-null, we're good
+ if (isKnownNonNull(RetVal, &TLI))
+ continue;
+
+ // Otherwise, we need to look upwards since we can't make any local
+ // conclusions.
+ Instruction *RVI = dyn_cast<Instruction>(RetVal);
+ if (!RVI)
return false;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::gettimeofday:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ FlowsToReturn.insert(PN->getIncomingValue(i));
+ continue;
+ }
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ Function *Callee = CS.getCalledFunction();
+ // A call to a node within the SCC is assumed to return null until
+ // proven otherwise
+ if (Callee && SCCNodes.count(Callee)) {
+ Speculative = true;
+ continue;
+ }
return false;
- // Currently some platforms have the restrict keyword on the arguments to
- // gettimeofday. To be conservative, do not add noalias to gettimeofday's
- // arguments.
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- default:
- // Didn't mark any attributes.
- return false;
+ }
+ default:
+ return false; // Unknown source, may be null
+ };
+ llvm_unreachable("should have either continued or returned");
}
return true;
}
-/// annotateLibraryCalls - Adds attributes to well-known standard library
-/// call declarations.
-bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+/// Deduce nonnull attributes for the SCC.
+static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
+ const TargetLibraryInfo &TLI) {
+ // Speculative that all functions in the SCC return only nonnull
+ // pointers. We may refute this as we analyze functions.
+ bool SCCReturnsNonNull = true;
+
bool MadeChange = false;
- // Check each function in turn annotating well-known library function
- // declarations with attributes.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
+ // Check each function in turn, determining which functions return nonnull
+ // pointers.
+ for (Function *F : SCCNodes) {
+ // Already nonnull.
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::NonNull))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate nonnull return values, which are only applicable to
+ // pointer types.
+ if (!F->getReturnType()->isPointerTy())
+ continue;
- if (F && F->isDeclaration())
- MadeChange |= inferPrototypeAttributes(*F);
+ bool Speculative = false;
+ if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) {
+ if (!Speculative) {
+ // Mark the function eagerly since we may discover a function
+ // which prevents us from speculating about the entire SCC
+ DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ ++NumNonNullReturn;
+ MadeChange = true;
+ }
+ continue;
+ }
+ // At least one function returns something which could be null, can't
+ // speculate any more.
+ SCCReturnsNonNull = false;
+ }
+
+ if (SCCReturnsNonNull) {
+ for (Function *F : SCCNodes) {
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::NonNull) ||
+ !F->getReturnType()->isPointerTy())
+ continue;
+
+ DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ ++NumNonNullReturn;
+ MadeChange = true;
+ }
}
return MadeChange;
}
+static bool setDoesNotRecurse(Function &F) {
+ if (F.doesNotRecurse())
+ return false;
+ F.setDoesNotRecurse();
+ ++NumNoRecurse;
+ return true;
+}
+
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
+ SmallVectorImpl<WeakVH> &Revisit) {
+ // Try and identify functions that do not recurse.
+
+ // If the SCC contains multiple nodes we know for sure there is recursion.
+ if (!SCC.isSingular())
+ return false;
+
+ const CallGraphNode *CGN = *SCC.begin();
+ Function *F = CGN->getFunction();
+ if (!F || F->isDeclaration() || F->doesNotRecurse())
+ return false;
+
+ // If all of the calls in F are identifiable and are to norecurse functions, F
+ // is norecurse. This check also detects self-recursion as F is not currently
+ // marked norecurse, so any called from F to F will not be marked norecurse.
+ if (std::all_of(CGN->begin(), CGN->end(),
+ [](const CallGraphNode::CallRecord &CR) {
+ Function *F = CR.second->getFunction();
+ return F && F->doesNotRecurse();
+ }))
+ // Function calls a potentially recursive function.
+ return setDoesNotRecurse(*F);
+
+ // We know that F is not obviously recursive, but we haven't been able to
+ // prove that it doesn't actually recurse. Add it to the Revisit list to try
+ // again top-down later.
+ Revisit.push_back(F);
+ return false;
+}
+
+static bool addNoRecurseAttrsTopDownOnly(Function *F) {
+ // If F is internal and all uses are in norecurse functions, then F is also
+ // norecurse.
+ if (F->doesNotRecurse())
+ return false;
+ if (F->hasInternalLinkage()) {
+ for (auto *U : F->users())
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (!I->getParent()->getParent()->doesNotRecurse())
+ return false;
+ } else {
+ return false;
+ }
+ return setDoesNotRecurse(*F);
+ }
+ return false;
+}
+
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
- AA = &getAnalysis<AliasAnalysis>();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ bool Changed = false;
- bool Changed = annotateLibraryCalls(SCC);
- Changed |= AddReadAttrs(SCC);
- Changed |= AddArgumentAttrs(SCC);
- Changed |= AddNoAliasAttrs(SCC);
+ // We compute dedicated AA results for each function in the SCC as needed. We
+ // use a lambda referencing external objects so that they live long enough to
+ // be queried, but we re-use them each time.
+ Optional<BasicAAResult> BAR;
+ Optional<AAResults> AAR;
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+ AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+ return *AAR;
+ };
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly looking up
+ // whether a given CallGraphNode is in this SCC. Also track whether there are
+ // any external or opt-none nodes that will prevent us from optimizing any
+ // part of the SCC.
+ SCCNodeSet SCCNodes;
+ bool ExternalNode = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) {
+ // External node or function we're trying not to optimize - we both avoid
+ // transform them and avoid leveraging information they provide.
+ ExternalNode = true;
+ continue;
+ }
+
+ SCCNodes.insert(F);
+ }
+
+ Changed |= addReadAttrs(SCCNodes, AARGetter);
+ Changed |= addArgumentAttrs(SCCNodes);
+
+ // If we have no external nodes participating in the SCC, we can deduce some
+ // more precise attributes as well.
+ if (!ExternalNode) {
+ Changed |= addNoAliasAttrs(SCCNodes);
+ Changed |= addNonNullAttrs(SCCNodes, *TLI);
+ }
+
+ Changed |= addNoRecurseAttrs(SCC, Revisit);
+ return Changed;
+}
+
+bool FunctionAttrs::doFinalization(CallGraph &CG) {
+ bool Changed = false;
+ // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
+ // the rules we have for identifying norecurse functions work best with a
+ // top-down walk, so look again at all the functions we previously marked as
+ // worth revisiting, in top-down order.
+ for (auto &F : reverse(Revisit))
+ if (F)
+ Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
new file mode 100644
index 0000000..d8b677b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -0,0 +1,433 @@
+//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Function import based on summaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/FunctionImport.h"
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "function-import"
+
+/// Limit on instruction count of imported functions.
+static cl::opt<unsigned> ImportInstrLimit(
+ "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
+ cl::desc("Only import functions with less than N instructions"));
+
+// Load lazily a module from \p FileName in \p Context.
+static std::unique_ptr<Module> loadFile(const std::string &FileName,
+ LLVMContext &Context) {
+ SMDiagnostic Err;
+ DEBUG(dbgs() << "Loading '" << FileName << "'\n");
+ std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+ if (!Result) {
+ Err.print("function-import", errs());
+ return nullptr;
+ }
+
+ Result->materializeMetadata();
+ UpgradeDebugInfo(*Result);
+
+ return Result;
+}
+
+namespace {
+/// Helper to load on demand a Module from file and cache it for subsequent
+/// queries. It can be used with the FunctionImporter.
+class ModuleLazyLoaderCache {
+ /// Cache of lazily loaded module for import.
+ StringMap<std::unique_ptr<Module>> ModuleMap;
+
+ /// Retrieve a Module from the cache or lazily load it on demand.
+ std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
+
+public:
+ /// Create the loader, Module will be initialized in \p Context.
+ ModuleLazyLoaderCache(std::function<
+ std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
+ : createLazyModule(createLazyModule) {}
+
+ /// Retrieve a Module from the cache or lazily load it on demand.
+ Module &operator()(StringRef FileName);
+
+ std::unique_ptr<Module> takeModule(StringRef FileName) {
+ auto I = ModuleMap.find(FileName);
+ assert(I != ModuleMap.end());
+ std::unique_ptr<Module> Ret = std::move(I->second);
+ ModuleMap.erase(I);
+ return Ret;
+ }
+};
+
+// Get a Module for \p FileName from the cache, or load it lazily.
+Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
+ auto &Module = ModuleMap[Identifier];
+ if (!Module)
+ Module = createLazyModule(Identifier);
+ return *Module;
+}
+} // anonymous namespace
+
+/// Walk through the instructions in \p F looking for external
+/// calls not already in the \p CalledFunctions set. If any are
+/// found they are added to the \p Worklist for importing.
+static void findExternalCalls(const Module &DestModule, Function &F,
+ const FunctionInfoIndex &Index,
+ StringSet<> &CalledFunctions,
+ SmallVector<StringRef, 64> &Worklist) {
+ // We need to suffix internal function calls imported from other modules,
+ // prepare the suffix ahead of time.
+ std::string Suffix;
+ if (F.getParent() != &DestModule)
+ Suffix =
+ (Twine(".llvm.") +
+ Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
+
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (isa<CallInst>(I)) {
+ auto CalledFunction = cast<CallInst>(I).getCalledFunction();
+ // Insert any new external calls that have not already been
+ // added to set/worklist.
+ if (!CalledFunction || !CalledFunction->hasName())
+ continue;
+ // Ignore intrinsics early
+ if (CalledFunction->isIntrinsic()) {
+ assert(CalledFunction->getIntrinsicID() != 0);
+ continue;
+ }
+ auto ImportedName = CalledFunction->getName();
+ auto Renamed = (ImportedName + Suffix).str();
+ // Rename internal functions
+ if (CalledFunction->hasInternalLinkage()) {
+ ImportedName = Renamed;
+ }
+ auto It = CalledFunctions.insert(ImportedName);
+ if (!It.second) {
+ // This is a call to a function we already considered, skip.
+ continue;
+ }
+ // Ignore functions already present in the destination module
+ auto *SrcGV = DestModule.getNamedValue(ImportedName);
+ if (SrcGV) {
+ assert(isa<Function>(SrcGV) && "Name collision during import");
+ if (!cast<Function>(SrcGV)->isDeclaration()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
+ << ImportedName << " already in DestinationModule\n");
+ continue;
+ }
+ }
+
+ Worklist.push_back(It.first->getKey());
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Adding callee for : " << ImportedName << " : "
+ << F.getName() << "\n");
+ }
+ }
+ }
+}
+
+// Helper function: given a worklist and an index, will process all the worklist
+// and decide what to import based on the summary information.
+//
+// Nothing is actually imported, functions are materialized in their source
+// module and analyzed there.
+//
+// \p ModuleToFunctionsToImportMap is filled with the set of Function to import
+// per Module.
+static void GetImportList(Module &DestModule,
+ SmallVector<StringRef, 64> &Worklist,
+ StringSet<> &CalledFunctions,
+ std::map<StringRef, DenseSet<const GlobalValue *>>
+ &ModuleToFunctionsToImportMap,
+ const FunctionInfoIndex &Index,
+ ModuleLazyLoaderCache &ModuleLoaderCache) {
+ while (!Worklist.empty()) {
+ auto CalledFunctionName = Worklist.pop_back_val();
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
+ << CalledFunctionName << "\n");
+
+ // Try to get a summary for this function call.
+ auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
+ if (InfoList == Index.end()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
+ << CalledFunctionName << " Ignoring.\n");
+ continue;
+ }
+ assert(!InfoList->second.empty() && "No summary, error at import?");
+
+ // Comdat can have multiple entries, FIXME: what do we do with them?
+ auto &Info = InfoList->second[0];
+ assert(Info && "Nullptr in list, error importing summaries?\n");
+
+ auto *Summary = Info->functionSummary();
+ if (!Summary) {
+ // FIXME: in case we are lazyloading summaries, we can do it now.
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Missing summary for " << CalledFunctionName
+ << ", error at import?\n");
+ llvm_unreachable("Missing summary");
+ }
+
+ if (Summary->instCount() > ImportInstrLimit) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
+ << CalledFunctionName << " with " << Summary->instCount()
+ << " instructions (limit " << ImportInstrLimit << ")\n");
+ continue;
+ }
+
+ // Get the module path from the summary.
+ auto ModuleIdentifier = Summary->modulePath();
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
+ << CalledFunctionName << " from " << ModuleIdentifier << "\n");
+
+ auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
+
+ // The function that we will import!
+ GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
+
+ if (!SGV) {
+ // The destination module is referencing function using their renamed name
+ // when importing a function that was originally local in the source
+ // module. The source module we have might not have been renamed so we try
+ // to remove the suffix added during the renaming to recover the original
+ // name in the source module.
+ std::pair<StringRef, StringRef> Split =
+ CalledFunctionName.split(".llvm.");
+ SGV = SrcModule.getNamedValue(Split.first);
+ assert(SGV && "Can't find function to import in source module");
+ }
+ if (!SGV) {
+ report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
+ "' in Module '" + SrcModule.getModuleIdentifier() +
+ "', error in the summary?\n");
+ }
+
+ Function *F = dyn_cast<Function>(SGV);
+ if (!F && isa<GlobalAlias>(SGV)) {
+ auto *SGA = dyn_cast<GlobalAlias>(SGV);
+ F = dyn_cast<Function>(SGA->getBaseObject());
+ CalledFunctionName = F->getName();
+ }
+ assert(F && "Imported Function is ... not a Function");
+
+ // We cannot import weak_any functions/aliases without possibly affecting
+ // the order they are seen and selected by the linker, changing program
+ // semantics.
+ if (SGV->hasWeakAnyLinkage()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Ignoring import request for weak-any "
+ << (isa<Function>(SGV) ? "function " : "alias ")
+ << CalledFunctionName << " from "
+ << SrcModule.getModuleIdentifier() << "\n");
+ continue;
+ }
+
+ // Add the function to the import list
+ auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
+ Entry.insert(F);
+
+ // Process the newly imported functions and add callees to the worklist.
+ F->materialize();
+ findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist);
+ }
+}
+
+// Automatically import functions in Module \p DestModule based on the summaries
+// index.
+//
+// The current implementation imports every called functions that exists in the
+// summaries index.
+bool FunctionImporter::importFunctions(Module &DestModule) {
+ DEBUG(dbgs() << "Starting import for Module "
+ << DestModule.getModuleIdentifier() << "\n");
+ unsigned ImportedCount = 0;
+
+ /// First step is collecting the called external functions.
+ StringSet<> CalledFunctions;
+ SmallVector<StringRef, 64> Worklist;
+ for (auto &F : DestModule) {
+ if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
+ continue;
+ findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist);
+ }
+ if (Worklist.empty())
+ return false;
+
+ /// Second step: for every call to an external function, try to import it.
+
+ // Linker that will be used for importing function
+ Linker TheLinker(DestModule);
+
+ // Map of Module -> List of Function to import from the Module
+ std::map<StringRef, DenseSet<const GlobalValue *>>
+ ModuleToFunctionsToImportMap;
+
+ // Analyze the summaries and get the list of functions to import by
+ // populating ModuleToFunctionsToImportMap
+ ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
+ GetImportList(DestModule, Worklist, CalledFunctions,
+ ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
+ assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
+
+ StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
+ ModuleToTempMDValsMap;
+
+ // Do the actual import of functions now, one Module at a time
+ for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
+ // Get the module for the import
+ auto &FunctionsToImport = FunctionsToImportPerModule.second;
+ std::unique_ptr<Module> SrcModule =
+ ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
+ assert(&DestModule.getContext() == &SrcModule->getContext() &&
+ "Context mismatch");
+
+ // Save the mapping of value ids to temporary metadata created when
+ // importing this function. If we have already imported from this module,
+ // add new temporary metadata to the existing mapping.
+ auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()];
+ if (!TempMDVals)
+ TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+
+ // Link in the specified functions.
+ if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
+ &Index, &FunctionsToImport, TempMDVals.get()))
+ report_fatal_error("Function Import: link error");
+
+ ImportedCount += FunctionsToImport.size();
+ }
+
+ // Now link in metadata for all modules from which we imported functions.
+ for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
+ ModuleToTempMDValsMap) {
+ // Load the specified source module.
+ auto &SrcModule = ModuleLoaderCache(SME.getKey());
+
+ // Link in all necessary metadata from this module.
+ if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
+ << DestModule.getModuleIdentifier() << "\n");
+ return ImportedCount;
+}
+
+/// Summary file to use for function importing when using -function-import from
+/// the command line.
+static cl::opt<std::string>
+ SummaryFile("summary-file",
+ cl::desc("The summary file to use for function importing."));
+
+static void diagnosticHandler(const DiagnosticInfo &DI) {
+ raw_ostream &OS = errs();
+ DiagnosticPrinterRawOStream DP(OS);
+ DI.print(DP);
+ OS << '\n';
+}
+
+/// Parse the function index out of an IR file and return the function
+/// index object if found, or nullptr if not.
+static std::unique_ptr<FunctionInfoIndex>
+getFunctionIndexForFile(StringRef Path, std::string &Error,
+ DiagnosticHandlerFunction DiagnosticHandler) {
+ std::unique_ptr<MemoryBuffer> Buffer;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(Path);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ Error = EC.message();
+ return nullptr;
+ }
+ Buffer = std::move(BufferOrErr.get());
+ ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+ object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
+ DiagnosticHandler);
+ if (std::error_code EC = ObjOrErr.getError()) {
+ Error = EC.message();
+ return nullptr;
+ }
+ return (*ObjOrErr)->takeIndex();
+}
+
+namespace {
+/// Pass that performs cross-module function import provided a summary file.
+class FunctionImportPass : public ModulePass {
+ /// Optional function summary index to use for importing, otherwise
+ /// the summary-file option must be specified.
+ const FunctionInfoIndex *Index;
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ const char *getPassName() const override {
+ return "Function Importing";
+ }
+
+ explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
+ : ModulePass(ID), Index(Index) {}
+
+ bool runOnModule(Module &M) override {
+ if (SummaryFile.empty() && !Index)
+ report_fatal_error("error: -function-import requires -summary-file or "
+ "file from frontend\n");
+ std::unique_ptr<FunctionInfoIndex> IndexPtr;
+ if (!SummaryFile.empty()) {
+ if (Index)
+ report_fatal_error("error: -summary-file and index from frontend\n");
+ std::string Error;
+ IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
+ if (!IndexPtr) {
+ errs() << "Error loading file '" << SummaryFile << "': " << Error
+ << "\n";
+ return false;
+ }
+ Index = IndexPtr.get();
+ }
+
+ // Perform the import now.
+ auto ModuleLoader = [&M](StringRef Identifier) {
+ return loadFile(Identifier, M.getContext());
+ };
+ FunctionImporter Importer(*Index, ModuleLoader);
+ return Importer.importFunctions(M);
+
+ return false;
+ }
+};
+} // anonymous namespace
+
+char FunctionImportPass::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
+ "Summary Based Function Import", false, false)
+INITIALIZE_PASS_END(FunctionImportPass, "function-import",
+ "Summary Based Function Import", false, false)
+
+namespace llvm {
+Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
+ return new FunctionImportPass(Index);
+}
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 61d0ff9..9b276ed 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -92,33 +92,28 @@ bool GlobalDCE::runOnModule(Module &M) {
ComdatMembers.insert(std::make_pair(C, &GA));
// Loop over the module, adding globals which are obviously necessary.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (Function &F : M) {
+ Changed |= RemoveUnusedGlobalValue(F);
// Functions with external linkage are needed if they have a body
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
- if (!I->isDiscardableIfUnused())
- GlobalIsNeeded(I);
- }
+ if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage())
+ if (!F.isDiscardableIfUnused())
+ GlobalIsNeeded(&F);
}
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (GlobalVariable &GV : M.globals()) {
+ Changed |= RemoveUnusedGlobalValue(GV);
// Externally visible & appending globals are needed, if they have an
// initializer.
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
- if (!I->isDiscardableIfUnused())
- GlobalIsNeeded(I);
- }
+ if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage())
+ if (!GV.isDiscardableIfUnused())
+ GlobalIsNeeded(&GV);
}
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (GlobalAlias &GA : M.aliases()) {
+ Changed |= RemoveUnusedGlobalValue(GA);
// Externally visible aliases are needed.
- if (!I->isDiscardableIfUnused()) {
- GlobalIsNeeded(I);
- }
+ if (!GA.isDiscardableIfUnused())
+ GlobalIsNeeded(&GA);
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -126,52 +121,50 @@ bool GlobalDCE::runOnModule(Module &M) {
//
// The first pass is to drop initializers of global variables which are dead.
- std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!AliveGlobals.count(I)) {
- DeadGlobalVars.push_back(I); // Keep track of dead globals
- if (I->hasInitializer()) {
- Constant *Init = I->getInitializer();
- I->setInitializer(nullptr);
+ std::vector<GlobalVariable *> DeadGlobalVars; // Keep track of dead globals
+ for (GlobalVariable &GV : M.globals())
+ if (!AliveGlobals.count(&GV)) {
+ DeadGlobalVars.push_back(&GV); // Keep track of dead globals
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ GV.setInitializer(nullptr);
if (isSafeToDestroyConstant(Init))
Init->destroyConstant();
}
}
// The second pass drops the bodies of functions which are dead...
- std::vector<Function*> DeadFunctions;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!AliveGlobals.count(I)) {
- DeadFunctions.push_back(I); // Keep track of dead globals
- if (!I->isDeclaration())
- I->deleteBody();
+ std::vector<Function *> DeadFunctions;
+ for (Function &F : M)
+ if (!AliveGlobals.count(&F)) {
+ DeadFunctions.push_back(&F); // Keep track of dead globals
+ if (!F.isDeclaration())
+ F.deleteBody();
}
// The third pass drops targets of aliases which are dead...
std::vector<GlobalAlias*> DeadAliases;
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
- ++I)
- if (!AliveGlobals.count(I)) {
- DeadAliases.push_back(I);
- I->setAliasee(nullptr);
+ for (GlobalAlias &GA : M.aliases())
+ if (!AliveGlobals.count(&GA)) {
+ DeadAliases.push_back(&GA);
+ GA.setAliasee(nullptr);
}
if (!DeadFunctions.empty()) {
// Now that all interferences have been dropped, delete the actual objects
// themselves.
- for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadFunctions[i]);
- M.getFunctionList().erase(DeadFunctions[i]);
+ for (Function *F : DeadFunctions) {
+ RemoveUnusedGlobalValue(*F);
+ M.getFunctionList().erase(F);
}
NumFunctions += DeadFunctions.size();
Changed = true;
}
if (!DeadGlobalVars.empty()) {
- for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
- M.getGlobalList().erase(DeadGlobalVars[i]);
+ for (GlobalVariable *GV : DeadGlobalVars) {
+ RemoveUnusedGlobalValue(*GV);
+ M.getGlobalList().erase(GV);
}
NumVariables += DeadGlobalVars.size();
Changed = true;
@@ -179,9 +172,9 @@ bool GlobalDCE::runOnModule(Module &M) {
// Now delete any dead aliases.
if (!DeadAliases.empty()) {
- for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadAliases[i]);
- M.getAliasList().erase(DeadAliases[i]);
+ for (GlobalAlias *GA : DeadAliases) {
+ RemoveUnusedGlobalValue(*GA);
+ M.getAliasList().erase(GA);
}
NumAliases += DeadAliases.size();
Changed = true;
@@ -222,21 +215,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// any globals used will be marked as needed.
Function *F = cast<Function>(G);
- if (F->hasPrefixData())
- MarkUsedGlobalsAsNeeded(F->getPrefixData());
-
- if (F->hasPrologueData())
- MarkUsedGlobalsAsNeeded(F->getPrologueData());
+ for (Use &U : F->operands())
+ MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
- if (F->hasPersonalityFn())
- MarkUsedGlobalsAsNeeded(F->getPersonalityFn());
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
- if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+ for (BasicBlock &BB : *F)
+ for (Instruction &I : BB)
+ for (Use &U : I.operands())
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
GlobalIsNeeded(GV);
- else if (Constant *C = dyn_cast<Constant>(*U))
+ else if (Constant *C = dyn_cast<Constant>(U))
MarkUsedGlobalsAsNeeded(C);
}
}
@@ -247,9 +234,9 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
// Loop over all of the operands of the constant, adding any globals they
// use to the list of needed globals.
- for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+ for (Use &U : C->operands()) {
// If we've already processed this constant there's no need to do it again.
- Constant *Op = dyn_cast<Constant>(*I);
+ Constant *Op = dyn_cast<Constant>(U);
if (Op && SeenConstants.insert(Op).second)
MarkUsedGlobalsAsNeeded(Op);
}
@@ -262,7 +249,8 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
// might make it deader.
//
bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
- if (GV.use_empty()) return false;
+ if (GV.use_empty())
+ return false;
GV.removeDeadConstantUsers();
return GV.use_empty();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 5ffe15d..fd77369 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -54,7 +55,6 @@ STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
STATISTIC(NumDeleted , "Number of globals deleted");
-STATISTIC(NumFnDeleted , "Number of functions deleted");
STATISTIC(NumGlobUses , "Number of global uses devirtualized");
STATISTIC(NumLocalized , "Number of globals localized");
STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
@@ -69,6 +69,7 @@ namespace {
struct GlobalOpt : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -81,11 +82,14 @@ namespace {
bool OptimizeFunctions(Module &M);
bool OptimizeGlobalVars(Module &M);
bool OptimizeGlobalAliases(Module &M);
- bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
- bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
- const GlobalStatus &GS);
+ bool deleteIfDead(GlobalValue &GV);
+ bool processGlobal(GlobalValue &GV);
+ bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+ bool isPointerValueDeadOnEntryToFunction(const Function *F,
+ GlobalValue *GV);
+
TargetLibraryInfo *TLI;
SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
@@ -95,13 +99,14 @@ char GlobalOpt::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
-/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
-/// as a root? If so, we might not really want to eliminate the stores to it.
+/// Is this global variable possibly used by a leak checker as a root? If so,
+/// we might not really want to eliminate the stores to it.
static bool isLeakCheckerRoot(GlobalVariable *GV) {
// A global variable is a root if it is a pointer, or could plausibly contain
// a pointer. There are two challenges; one is that we could have a struct
@@ -176,10 +181,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
} while (1);
}
-/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users
-/// of the global and clean up any that obviously don't assign the global a
-/// value that isn't dynamically allocated.
-///
+/// This GV is a pointer root. Loop over all users of the global and clean up
+/// any that obviously don't assign the global a value that isn't dynamically
+/// allocated.
static bool CleanupPointerRootUsers(GlobalVariable *GV,
const TargetLibraryInfo *TLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
@@ -263,10 +267,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
return Changed;
}
-/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
-/// users of the global, cleaning up the obvious ones. This is largely just a
-/// quick scan over the use list to clean up the easy and obvious cruft. This
-/// returns true if it made a change.
+/// We just marked GV constant. Loop over all users of the global, cleaning up
+/// the obvious ones. This is largely just a quick scan over the use list to
+/// clean up the easy and obvious cruft. This returns true if it made a change.
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
@@ -353,8 +356,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
return Changed;
}
-/// isSafeSROAElementUse - Return true if the specified instruction is a safe
-/// user of a derived expression from a global that we want to SROA.
+/// Return true if the specified instruction is a safe user of a derived
+/// expression from a global that we want to SROA.
static bool isSafeSROAElementUse(Value *V) {
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
@@ -385,9 +388,8 @@ static bool isSafeSROAElementUse(Value *V) {
}
-/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
-/// Look at it and its uses and decide whether it is safe to SROA this global.
-///
+/// U is a direct user of the specified global value. Look at it and its uses
+/// and decide whether it is safe to SROA this global.
static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
// The user of the global must be a GEP Inst or a ConstantExpr GEP.
if (!isa<GetElementPtrInst>(U) &&
@@ -452,9 +454,8 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
return true;
}
-/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
-/// is safe for us to perform this transformation.
-///
+/// Look at all uses of the global and decide whether it is safe for us to
+/// perform this transformation.
static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
for (User *U : GV->users())
if (!IsUserOfGlobalSafeForSRA(U, GV))
@@ -464,10 +465,10 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
}
-/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
-/// variable. This opens the door for other optimizations by exposing the
-/// behavior of the program in a more fine-grained way. We have determined that
-/// this transformation is safe already. We return the first global variable we
+/// Perform scalar replacement of aggregates on the specified global variable.
+/// This opens the door for other optimizations by exposing the behavior of the
+/// program in a more fine-grained way. We have determined that this
+/// transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Make sure this global only has simple uses that we can SRA.
@@ -497,7 +498,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
In, GV->getName()+"."+Twine(i),
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- Globals.insert(GV, NGV);
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ Globals.push_back(NGV);
NewGlobals.push_back(NGV);
// Calculate the known alignment of the field. If the original aggregate
@@ -530,7 +532,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
In, GV->getName()+"."+Twine(i),
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- Globals.insert(GV, NGV);
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ Globals.push_back(NGV);
NewGlobals.push_back(NGV);
// Calculate the known alignment of the field. If the original aggregate
@@ -545,7 +548,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (NewGlobals.empty())
return nullptr;
- DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+ DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -610,9 +613,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
}
-/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null. PHIs keeps track of any
-/// phi nodes we've seen to avoid reprocessing them.
+/// Return true if all users of the specified value will trap if the value is
+/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
+/// reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSetImpl<const PHINode*> &PHIs) {
for (const User *U : V->users())
@@ -653,9 +656,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
return true;
}
-/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
-/// from GV will trap if the loaded value is null. Note that this also permits
-/// comparisons of the loaded value against null, as a special case.
+/// Return true if all uses of any loads from GV will trap if the loaded value
+/// is null. Note that this also permits comparisons of the loaded value
+/// against null, as a special case.
static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
for (const User *U : GV->users())
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
@@ -735,10 +738,10 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
}
-/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
-/// value stored into it. If there are uses of the loaded value that would trap
-/// if the loaded value is dynamically null, then we know that they cannot be
-/// reachable with a null optimize away the load.
+/// The specified global has only one non-null value stored into it. If there
+/// are uses of the loaded value that would trap if the loaded value is
+/// dynamically null, then we know that they cannot be reachable with a null
+/// optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
@@ -778,7 +781,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
}
if (Changed) {
- DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+ DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n");
++NumGlobUses;
}
@@ -801,8 +804,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
return Changed;
}
-/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
-/// instructions that are foldable.
+/// Walk the use list of V, constant folding all of the instructions that are
+/// foldable.
static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
TargetLibraryInfo *TLI) {
for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
@@ -818,11 +821,11 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
}
}
-/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
-/// variable, and transforms the program as if it always contained the result of
-/// the specified malloc. Because it is always the result of the specified
-/// malloc, there is no reason to actually DO the malloc. Instead, turn the
-/// malloc into a global, and any loads of GV as uses of the new global.
+/// This function takes the specified global variable, and transforms the
+/// program as if it always contained the result of the specified malloc.
+/// Because it is always the result of the specified malloc, there is no reason
+/// to actually DO the malloc. Instead, turn the malloc into a global, and any
+/// loads of GV as uses of the new global.
static GlobalVariable *
OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
ConstantInt *NElements, const DataLayout &DL,
@@ -838,13 +841,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
- GlobalType, false,
- GlobalValue::InternalLinkage,
- UndefValue::get(GlobalType),
- GV->getName()+".body",
- GV,
- GV->getThreadLocalMode());
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
+ GV->getThreadLocalMode());
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
@@ -935,7 +935,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
cast<StoreInst>(InitBool->user_back())->eraseFromParent();
delete InitBool;
} else
- GV->getParent()->getGlobalList().insert(GV, InitBool);
+ GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
// Now the GV is dead, nuke it and the malloc..
GV->eraseFromParent();
@@ -951,10 +951,9 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
return NewGV;
}
-/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
-/// to make sure that there are no complex uses of V. We permit simple things
-/// like dereferencing the pointer, but not storing through the address, unless
-/// it is to the specified global.
+/// Scan the use-list of V checking to make sure that there are no complex uses
+/// of V. We permit simple things like dereferencing the pointer, but not
+/// storing through the address, unless it is to the specified global.
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
const GlobalVariable *GV,
SmallPtrSetImpl<const PHINode*> &PHIs) {
@@ -998,10 +997,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
return true;
}
-/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
-/// somewhere. Transform all uses of the allocation into loads from the
-/// global and uses of the resultant pointer. Further, delete the store into
-/// GV. This assumes that these value pass the
+/// The Alloc pointer is stored into GV somewhere. Transform all uses of the
+/// allocation into loads from the global and uses of the resultant pointer.
+/// Further, delete the store into GV. This assumes that these value pass the
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
GlobalVariable *GV) {
@@ -1043,9 +1041,9 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
}
}
-/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
-/// of a load) are simple enough to perform heap SRA on. This permits GEP's
-/// that index through the array and struct field, icmps of null, and PHIs.
+/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
+/// perform heap SRA on. This permits GEP's that index through the array and
+/// struct field, icmps of null, and PHIs.
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
@@ -1096,8 +1094,8 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
}
-/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
-/// GV are simple enough to perform HeapSRA, return true.
+/// If all users of values loaded from GV are simple enough to perform HeapSRA,
+/// return true.
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
Instruction *StoredVal) {
SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
@@ -1186,8 +1184,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
return FieldVals[FieldNo] = Result;
}
-/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
-/// the load, rewrite the derived value to use the HeapSRoA'd load.
+/// Given a load instruction and a value derived from the load, rewrite the
+/// derived value to use the HeapSRoA'd load.
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
@@ -1248,10 +1246,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
}
}
-/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
-/// is a value loaded from the global. Eliminate all uses of Ptr, making them
-/// use FieldGlobals instead. All uses of loaded values satisfy
-/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+/// We are performing Heap SRoA on a global. Ptr is a value loaded from the
+/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
+/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
@@ -1266,8 +1263,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
}
}
-/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
-/// it up into multiple allocations of arrays of the fields.
+/// CI is an allocation of an array of structures. Break it up into multiple
+/// allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Value *NElems, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
@@ -1291,12 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Type *FieldTy = STy->getElementType(FieldNo);
PointerType *PFieldTy = PointerType::get(FieldTy, AS);
- GlobalVariable *NGV =
- new GlobalVariable(*GV->getParent(),
- PFieldTy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(PFieldTy),
- GV->getName() + ".f" + Twine(FieldNo), GV,
- GV->getThreadLocalMode());
+ GlobalVariable *NGV = new GlobalVariable(
+ *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
+ nullptr, GV->getThreadLocalMode());
FieldGlobals.push_back(NGV);
unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
@@ -1336,7 +1331,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Split the basic block at the old malloc.
BasicBlock *OrigBB = CI->getParent();
- BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+ BasicBlock *ContBB =
+ OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
@@ -1376,9 +1372,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// CI is no longer needed, remove it.
CI->eraseFromParent();
- /// InsertedScalarizedLoads - As we process loads, if we can't immediately
- /// update all uses of the load, keep track of what scalarized loads are
- /// inserted for a given load.
+ /// As we process loads, if we can't immediately update all uses of the load,
+ /// keep track of what scalarized loads are inserted for a given load.
DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
InsertedScalarizedValues[GV] = FieldGlobals;
@@ -1454,13 +1449,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
return cast<GlobalVariable>(FieldGlobals[0]);
}
-/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
-/// pointer global variable with a single value stored it that is a malloc or
-/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
+/// This function is called when we see a pointer global variable with a single
+/// value stored it that is a malloc or cast of malloc.
+static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
Type *AllocTy,
AtomicOrdering Ordering,
- Module::global_iterator &GVI,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
// If this is a malloc of an abstract type, don't touch it.
@@ -1499,7 +1492,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
+ OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
return true;
}
@@ -1544,19 +1537,18 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
CI = cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true),
- DL, TLI);
+ PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
+ TLI);
return true;
}
return false;
}
-// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
-// that only one value (besides its initializer) is ever stored to the global.
-static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+// Try to optimize globals based on the knowledge that only one value (besides
+// its initializer) is ever stored to the global.
+static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
AtomicOrdering Ordering,
- Module::global_iterator &GVI,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
@@ -1577,9 +1569,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
Type *MallocType = getMallocAllocatedType(CI, TLI);
- if (MallocType &&
- TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
- DL, TLI))
+ if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ Ordering, DL, TLI))
return true;
}
}
@@ -1587,10 +1578,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
return false;
}
-/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
-/// two values ever stored into GV are its initializer and OtherVal. See if we
-/// can shrink the global into a boolean and select between the two values
-/// whenever it is used. This exposes the values to other scalar optimizations.
+/// At this point, we have learned that the only two values ever stored into GV
+/// are its initializer and OtherVal. See if we can shrink the global into a
+/// boolean and select between the two values whenever it is used. This exposes
+/// the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
Type *GVElType = GV->getType()->getElementType();
@@ -1610,7 +1601,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
return false;
- DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
+ DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
// Create the new global, initializing it to false.
GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
@@ -1620,7 +1611,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
GV->getName()+".b",
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- GV->getParent()->getGlobalList().insert(GV, NewGV);
+ GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
Constant *InitVal = GV->getInitializer();
assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
@@ -1688,61 +1679,213 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
return true;
}
+bool GlobalOpt::deleteIfDead(GlobalValue &GV) {
+ GV.removeDeadConstantUsers();
-/// ProcessGlobal - Analyze the specified global variable and optimize it if
-/// possible. If we make a change, return true.
-bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
- Module::global_iterator &GVI) {
- // Do more involved optimizations if the global is internal.
- GV->removeDeadConstantUsers();
+ if (!GV.isDiscardableIfUnused())
+ return false;
- if (GV->use_empty()) {
- DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
- GV->eraseFromParent();
- ++NumDeleted;
- return true;
- }
+ if (const Comdat *C = GV.getComdat())
+ if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
+ return false;
- if (!GV->hasLocalLinkage())
+ bool Dead;
+ if (auto *F = dyn_cast<Function>(&GV))
+ Dead = F->isDefTriviallyDead();
+ else
+ Dead = GV.use_empty();
+ if (!Dead)
+ return false;
+
+ DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+ GV.eraseFromParent();
+ ++NumDeleted;
+ return true;
+}
+
+/// Analyze the specified global variable and optimize it if possible. If we
+/// make a change, return true.
+bool GlobalOpt::processGlobal(GlobalValue &GV) {
+ // Do more involved optimizations if the global is internal.
+ if (!GV.hasLocalLinkage())
return false;
GlobalStatus GS;
- if (GlobalStatus::analyzeGlobal(GV, GS))
+ if (GlobalStatus::analyzeGlobal(&GV, GS))
return false;
- if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
- GV->setUnnamedAddr(true);
+ bool Changed = false;
+ if (!GS.IsCompared && !GV.hasUnnamedAddr()) {
+ GV.setUnnamedAddr(true);
NumUnnamed++;
+ Changed = true;
}
- if (GV->isConstant() || !GV->hasInitializer())
+ auto *GVar = dyn_cast<GlobalVariable>(&GV);
+ if (!GVar)
+ return Changed;
+
+ if (GVar->isConstant() || !GVar->hasInitializer())
+ return Changed;
+
+ return processInternalGlobal(GVar, GS) || Changed;
+}
+
+bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
+ // Find all uses of GV. We expect them all to be in F, and if we can't
+ // identify any of the uses we bail out.
+ //
+ // On each of these uses, identify if the memory that GV points to is
+ // used/required/live at the start of the function. If it is not, for example
+ // if the first thing the function does is store to the GV, the GV can
+ // possibly be demoted.
+ //
+ // We don't do an exhaustive search for memory operations - simply look
+ // through bitcasts as they're quite common and benign.
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ SmallVector<LoadInst *, 4> Loads;
+ SmallVector<StoreInst *, 4> Stores;
+ for (auto *U : GV->users()) {
+ if (Operator::getOpcode(U) == Instruction::BitCast) {
+ for (auto *UU : U->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(UU))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(UU))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+ continue;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return false;
+ assert(I->getParent()->getParent() == F);
+
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(I))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+
+ // We have identified all uses of GV into loads and stores. Now check if all
+ // of them are known not to depend on the value of the global at the function
+ // entry point. We do this by ensuring that every load is dominated by at
+ // least one store.
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
+ .getDomTree();
+
+ // The below check is quadratic. Check we're not going to do too many tests.
+ // FIXME: Even though this will always have worst-case quadratic time, we
+ // could put effort into minimizing the average time by putting stores that
+ // have been shown to dominate at least one load at the beginning of the
+ // Stores array, making subsequent dominance checks more likely to succeed
+ // early.
+ //
+ // The threshold here is fairly large because global->local demotion is a
+ // very powerful optimization should it fire.
+ const unsigned Threshold = 100;
+ if (Loads.size() * Stores.size() > Threshold)
return false;
- return ProcessInternalGlobal(GV, GVI, GS);
+ for (auto *L : Loads) {
+ auto *LTy = L->getType();
+ if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) {
+ auto *STy = S->getValueOperand()->getType();
+ // The load is only dominated by the store if DomTree says so
+ // and the number of bits loaded in L is less than or equal to
+ // the number of bits stored in S.
+ return DT.dominates(S, L) &&
+ DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
+ }))
+ return false;
+ }
+ // All loads have known dependences inside F, so the global can be localized.
+ return true;
+}
+
+/// C may have non-instruction users. Can all of those users be turned into
+/// instructions?
+static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
+ // We don't do this exhaustively. The most common pattern that we really need
+ // to care about is a constant GEP or constant bitcast - so just looking
+ // through one single ConstantExpr.
+ //
+ // The set of constants that this function returns true for must be able to be
+ // handled by makeAllConstantUsesInstructions.
+ for (auto *U : C->users()) {
+ if (isa<Instruction>(U))
+ continue;
+ if (!isa<ConstantExpr>(U))
+ // Non instruction, non-constantexpr user; cannot convert this.
+ return false;
+ for (auto *UU : U->users())
+ if (!isa<Instruction>(UU))
+ // A constantexpr used by another constant. We don't try and recurse any
+ // further but just bail out at this point.
+ return false;
+ }
+
+ return true;
+}
+
+/// C may have non-instruction users, and
+/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
+/// non-instruction users to instructions.
+static void makeAllConstantUsesInstructions(Constant *C) {
+ SmallVector<ConstantExpr*,4> Users;
+ for (auto *U : C->users()) {
+ if (isa<ConstantExpr>(U))
+ Users.push_back(cast<ConstantExpr>(U));
+ else
+ // We should never get here; allNonInstructionUsersCanBeMadeInstructions
+ // should not have returned true for C.
+ assert(
+ isa<Instruction>(U) &&
+ "Can't transform non-constantexpr non-instruction to instruction!");
+ }
+
+ SmallVector<Value*,4> UUsers;
+ for (auto *U : Users) {
+ UUsers.clear();
+ for (auto *UU : U->users())
+ UUsers.push_back(UU);
+ for (auto *UU : UUsers) {
+ Instruction *UI = cast<Instruction>(UU);
+ Instruction *NewU = U->getAsInstruction();
+ NewU->insertBefore(UI);
+ UI->replaceUsesOfWith(U, NewU);
+ }
+ U->dropAllReferences();
+ }
}
-/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
-bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
- Module::global_iterator &GVI,
+bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
const GlobalStatus &GS) {
auto &DL = GV->getParent()->getDataLayout();
- // If this is a first class global and has only one accessing function
- // and this function is main (which we know is not recursive), we replace
- // the global with a local alloca in this function.
+ // If this is a first class global and has only one accessing function and
+ // this function is non-recursive, we replace the global with a local alloca
+ // in this function.
//
// NOTE: It doesn't make sense to promote non-single-value types since we
// are just replacing static memory to stack memory.
//
// If the global is in different address space, don't bring it to stack.
if (!GS.HasMultipleAccessingFunctions &&
- GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GS.AccessingFunction &&
GV->getType()->getElementType()->isSingleValueType() &&
- GS.AccessingFunction->getName() == "main" &&
- GS.AccessingFunction->hasExternalLinkage() &&
- GV->getType()->getAddressSpace() == 0) {
- DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+ GV->getType()->getAddressSpace() == 0 &&
+ !GV->isExternallyInitialized() &&
+ allNonInstructionUsersCanBeMadeInstructions(GV) &&
+ GS.AccessingFunction->doesNotRecurse() &&
+ isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
Type *ElemTy = GV->getType()->getElementType();
@@ -1752,6 +1895,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+ makeAllConstantUsesInstructions(GV);
+
GV->replaceAllUsesWith(Alloca);
GV->eraseFromParent();
++NumLocalized;
@@ -1761,7 +1906,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
if (!GS.IsLoaded) {
- DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
bool Changed;
if (isLeakCheckerRoot(GV)) {
@@ -1800,11 +1945,9 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
const DataLayout &DL = GV->getParent()->getDataLayout();
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
- GVI = FirstNewGV; // Don't skip the newly produced globals!
+ if (SRAGlobal(GV, DL))
return true;
- }
- } else if (GS.StoredType == GlobalStatus::StoredOnce) {
+ } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
@@ -1822,8 +1965,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
<< "simplify all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
- } else {
- GVI = GV;
}
++NumSubstitute;
return true;
@@ -1831,8 +1972,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
- DL, TLI))
+ if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1850,8 +1990,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return false;
}
-/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
-/// function, changing them to FastCC.
+/// Walk all of the direct calls of the specified function, changing them to
+/// FastCC.
static void ChangeCalleesToFastCall(Function *F) {
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
@@ -1898,38 +2038,38 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
bool Changed = false;
// Optimize functions.
for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
- Function *F = FI++;
+ Function *F = &*FI++;
// Functions without names cannot be referenced outside this module.
if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
- const Comdat *C = F->getComdat();
- bool inComdat = C && NotDiscardableComdats.count(C);
- F->removeDeadConstantUsers();
- if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) {
- F->eraseFromParent();
+ if (deleteIfDead(*F)) {
Changed = true;
- ++NumFnDeleted;
- } else if (F->hasLocalLinkage()) {
- if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
- !F->hasAddressTaken()) {
- // If this function has a calling convention worth changing, is not a
- // varargs function, and is only called directly, promote it to use the
- // Fast calling convention.
- F->setCallingConv(CallingConv::Fast);
- ChangeCalleesToFastCall(F);
- ++NumFastCallFns;
- Changed = true;
- }
+ continue;
+ }
- if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
- !F->hasAddressTaken()) {
- // The function is not used by a trampoline intrinsic, so it is safe
- // to remove the 'nest' attribute.
- RemoveNestAttribute(F);
- ++NumNestRemoved;
- Changed = true;
- }
+ Changed |= processGlobal(*F);
+
+ if (!F->hasLocalLinkage())
+ continue;
+ if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
+ !F->hasAddressTaken()) {
+ // If this function has a calling convention worth changing, is not a
+ // varargs function, and is only called directly, promote it to use the
+ // Fast calling convention.
+ F->setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
+
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F->hasAddressTaken()) {
+ // The function is not used by a trampoline intrinsic, so it is safe
+ // to remove the 'nest' attribute.
+ RemoveNestAttribute(F);
+ ++NumNestRemoved;
+ Changed = true;
}
}
return Changed;
@@ -1940,7 +2080,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// Global variables without names cannot be referenced outside this module.
if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
GV->setLinkage(GlobalValue::InternalLinkage);
@@ -1953,12 +2093,12 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
GV->setInitializer(New);
}
- if (GV->isDiscardableIfUnused()) {
- if (const Comdat *C = GV->getComdat())
- if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage())
- continue;
- Changed |= ProcessGlobal(GV, GVI);
+ if (deleteIfDead(*GV)) {
+ Changed = true;
+ continue;
}
+
+ Changed |= processGlobal(*GV);
}
return Changed;
}
@@ -1968,8 +2108,8 @@ isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSetImpl<Constant *> &SimpleConstants,
const DataLayout &DL);
-/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
-/// handled by the code generator. We don't want to generate something like:
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
/// void *X = &X/42;
/// because the code generator doesn't have a relocation that can handle that.
///
@@ -2044,11 +2184,11 @@ isSimpleEnoughValueToCommit(Constant *C,
}
-/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
-/// enough for us to understand. In particular, if it is a cast to anything
-/// other than from one pointer type to another pointer type, we punt.
-/// We basically just support direct accesses to globals and GEP's of
-/// globals. This should be kept up to date with CommitValueTo.
+/// Return true if this constant is simple enough for us to understand. In
+/// particular, if it is a cast to anything other than from one pointer type to
+/// another pointer type, we punt. We basically just support direct accesses to
+/// globals and GEP's of globals. This should be kept up to date with
+/// CommitValueTo.
static bool isSimpleEnoughPointerToCommit(Constant *C) {
// Conservatively, avoid aggregate types. This is because we don't
// want to worry about them partially overlapping other stores.
@@ -2095,9 +2235,9 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
}
-/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
-/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
-/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+/// Evaluate a piece of a constantexpr store into a global initializer. This
+/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the
+/// GEP operands of Addr [0, OpNo) have been stepped into.
static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
ConstantExpr *Addr, unsigned OpNo) {
// Base case of the recursion.
@@ -2144,7 +2284,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
return ConstantVector::get(Elts);
}
-/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// We have decided that Addr (which satisfies the predicate
/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
static void CommitValueTo(Constant *Val, Constant *Addr) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
@@ -2160,10 +2300,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
namespace {
-/// Evaluator - This class evaluates LLVM IR, producing the Constant
-/// representing each SSA instruction. Changes to global variables are stored
-/// in a mapping that can be iterated over after the evaluation is complete.
-/// Once an evaluation call fails, the evaluation object should not be reused.
+/// This class evaluates LLVM IR, producing the Constant representing each SSA
+/// instruction. Changes to global variables are stored in a mapping that can
+/// be iterated over after the evaluation is complete. Once an evaluation call
+/// fails, the evaluation object should not be reused.
class Evaluator {
public:
Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
@@ -2180,15 +2320,15 @@ public:
Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
}
- /// EvaluateFunction - Evaluate a call to function F, returning true if
- /// successful, false if we can't evaluate it. ActualArgs contains the formal
- /// arguments for the function.
+ /// Evaluate a call to function F, returning true if successful, false if we
+ /// can't evaluate it. ActualArgs contains the formal arguments for the
+ /// function.
bool EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs);
- /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
- /// successful, false if we can't evaluate it. NewBB returns the next BB that
- /// control flows into, or null upon return.
+ /// Evaluate all instructions in block BB, returning true if successful, false
+ /// if we can't evaluate it. NewBB returns the next BB that control flows
+ /// into, or null upon return.
bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
Constant *getVal(Value *V) {
@@ -2213,32 +2353,31 @@ public:
private:
Constant *ComputeLoadResult(Constant *P);
- /// ValueStack - As we compute SSA register values, we store their contents
- /// here. The back of the deque contains the current function and the stack
- /// contains the values in the calling frames.
+ /// As we compute SSA register values, we store their contents here. The back
+ /// of the deque contains the current function and the stack contains the
+ /// values in the calling frames.
std::deque<DenseMap<Value*, Constant*>> ValueStack;
- /// CallStack - This is used to detect recursion. In pathological situations
- /// we could hit exponential behavior, but at least there is nothing
- /// unbounded.
+ /// This is used to detect recursion. In pathological situations we could hit
+ /// exponential behavior, but at least there is nothing unbounded.
SmallVector<Function*, 4> CallStack;
- /// MutatedMemory - For each store we execute, we update this map. Loads
- /// check this to get the most up-to-date value. If evaluation is successful,
- /// this state is committed to the process.
+ /// For each store we execute, we update this map. Loads check this to get
+ /// the most up-to-date value. If evaluation is successful, this state is
+ /// committed to the process.
DenseMap<Constant*, Constant*> MutatedMemory;
- /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
- /// to represent its body. This vector is needed so we can delete the
- /// temporary globals when we are done.
+ /// To 'execute' an alloca, we create a temporary global variable to represent
+ /// its body. This vector is needed so we can delete the temporary globals
+ /// when we are done.
SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
- /// Invariants - These global variables have been marked invariant by the
- /// static constructor.
+ /// These global variables have been marked invariant by the static
+ /// constructor.
SmallPtrSet<GlobalVariable*, 8> Invariants;
- /// SimpleConstants - These are constants we have checked and know to be
- /// simple enough to live in a static initializer of a global.
+ /// These are constants we have checked and know to be simple enough to live
+ /// in a static initializer of a global.
SmallPtrSet<Constant*, 8> SimpleConstants;
const DataLayout &DL;
@@ -2247,9 +2386,8 @@ private:
} // anonymous namespace
-/// ComputeLoadResult - Return the value that would be computed by a load from
-/// P after the stores reflected by 'memory' have been performed. If we can't
-/// decide, return null.
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed. If we can't decide, return null.
Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
@@ -2275,9 +2413,9 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
return nullptr; // don't know how to evaluate.
}
-/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
-/// successful, false if we can't evaluate it. NewBB returns the next BB that
-/// control flows into, or null upon return.
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it. NewBB returns the next BB that control flows into,
+/// or null upon return.
bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
BasicBlock *&NextBB) {
// This is the main evaluation loop.
@@ -2438,7 +2576,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
InstResult = AllocaTmps.back().get();
DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
- CallSite CS(CurInst);
+ CallSite CS(&*CurInst);
// Debug info can safely be ignored here.
if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
@@ -2504,6 +2642,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// Continue even if we do nothing.
++CurInst;
continue;
+ } else if (II->getIntrinsicID() == Intrinsic::assume) {
+ DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+ ++CurInst;
+ continue;
}
DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
@@ -2600,7 +2742,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
- setVal(CurInst, InstResult);
+ setVal(&*CurInst, InstResult);
}
// If we just processed an invoke, we finished evaluating the block.
@@ -2615,9 +2757,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
}
}
-/// EvaluateFunction - Evaluate a call to function F, returning true if
-/// successful, false if we can't evaluate it. ActualArgs contains the formal
-/// arguments for the function.
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it. ActualArgs contains the formal arguments for the
+/// function.
bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs) {
// Check to see if this function is already executing (recursion). If so,
@@ -2631,7 +2773,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
unsigned ArgNo = 0;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
++AI, ++ArgNo)
- setVal(AI, ActualArgs[ArgNo]);
+ setVal(&*AI, ActualArgs[ArgNo]);
// ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
// we can only evaluate any one basic block at most once. This set keeps
@@ -2639,7 +2781,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
// CurBB - The current basic block we're evaluating.
- BasicBlock *CurBB = F->begin();
+ BasicBlock *CurBB = &F->front();
BasicBlock::iterator CurInst = CurBB->begin();
@@ -2679,8 +2821,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
}
}
-/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
-/// we can. Return true if we can, false otherwise.
+/// Evaluate static constructors in the function, if we can. Return true if we
+/// can, false otherwise.
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Call the function.
@@ -2708,7 +2850,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
}
static int compareNames(Constant *const *A, Constant *const *B) {
- return (*A)->getName().compare((*B)->getName());
+ return (*A)->stripPointerCasts()->getName().compare(
+ (*B)->stripPointerCasts()->getName());
}
static void setUsedInitializer(GlobalVariable &V,
@@ -2742,7 +2885,7 @@ static void setUsedInitializer(GlobalVariable &V,
}
namespace {
-/// \brief An easy to access representation of llvm.used and llvm.compiler.used.
+/// An easy to access representation of llvm.used and llvm.compiler.used.
class LLVMUsed {
SmallPtrSet<GlobalValue *, 8> Used;
SmallPtrSet<GlobalValue *, 8> CompilerUsed;
@@ -2861,10 +3004,17 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E;) {
- Module::alias_iterator J = I++;
+ GlobalAlias *J = &*I++;
+
// Aliases without names cannot be referenced outside this module.
if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
J->setLinkage(GlobalValue::InternalLinkage);
+
+ if (deleteIfDead(*J)) {
+ Changed = true;
+ continue;
+ }
+
// If the aliasee may change at link time, nothing can be done - bail out.
if (J->mayBeOverridden())
continue;
@@ -2889,15 +3039,15 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
- Target->takeName(J);
+ Target->takeName(&*J);
Target->setLinkage(J->getLinkage());
Target->setVisibility(J->getVisibility());
Target->setDLLStorageClass(J->getDLLStorageClass());
- if (Used.usedErase(J))
+ if (Used.usedErase(&*J))
Used.usedInsert(Target);
- if (Used.compilerUsedErase(J))
+ if (Used.compilerUsedErase(&*J))
Used.compilerUsedInsert(Target);
} else if (mayHaveOtherReferences(*J, Used))
continue;
@@ -2936,8 +3086,8 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
return Fn;
}
-/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
-/// destructor and can therefore be eliminated.
+/// Returns whether the given function is an empty C++ destructor and can
+/// therefore be eliminated.
/// Note that we assume that other optimization passes have already simplified
/// the code so we only look for a function with a single basic block, where
/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
@@ -3081,3 +3231,4 @@ bool GlobalOpt::runOnModule(Module &M) {
return Changed;
}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 50f56b0..7ea6c08 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the common infrastructure (including C bindings) for
-// libLLVMIPO.a, which implements several transformations over the LLVM
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
// intermediate representation.
//
//===----------------------------------------------------------------------===//
@@ -24,14 +24,17 @@ using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeArgPromotionPass(Registry);
initializeConstantMergePass(Registry);
+ initializeCrossDSOCFIPass(Registry);
initializeDAEPass(Registry);
initializeDAHPass(Registry);
+ initializeForceFunctionAttrsLegacyPassPass(Registry);
initializeFunctionAttrsPass(Registry);
initializeGlobalDCEPass(Registry);
initializeGlobalOptPass(Registry);
initializeIPCPPass(Registry);
initializeAlwaysInlinerPass(Registry);
initializeSimpleInlinerPass(Registry);
+ initializeInferFunctionAttrsLegacyPassPass(Registry);
initializeInternalizePassPass(Registry);
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
@@ -40,13 +43,15 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
initializePruneEHPass(Registry);
- initializeStripDeadPrototypesPassPass(Registry);
+ initializeStripDeadPrototypesLegacyPassPass(Registry);
initializeStripSymbolsPass(Registry);
initializeStripDebugDeclarePass(Registry);
initializeStripDeadDebugInfoPass(Registry);
initializeStripNonDebugSymbolsPass(Registry);
initializeBarrierNoopPass(Registry);
initializeEliminateAvailableExternallyPass(Registry);
+ initializeSampleProfileLoaderPass(Registry);
+ initializeFunctionImportPassPass(Registry);
}
void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
new file mode 100644
index 0000000..d02c861
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -0,0 +1,937 @@
+//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "inferattrs"
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+
+static bool setDoesNotAccessMemory(Function &F) {
+ if (F.doesNotAccessMemory())
+ return false;
+ F.setDoesNotAccessMemory();
+ ++NumReadNone;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+ if (F.onlyReadsMemory())
+ return false;
+ F.setOnlyReadsMemory();
+ ++NumReadOnly;
+ return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+ if (F.doesNotThrow())
+ return false;
+ F.setDoesNotThrow();
+ ++NumNoUnwind;
+ return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned n) {
+ if (F.doesNotCapture(n))
+ return false;
+ F.setDoesNotCapture(n);
+ ++NumNoCapture;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned n) {
+ if (F.onlyReadsMemory(n))
+ return false;
+ F.setOnlyReadsMemory(n);
+ ++NumReadOnlyArg;
+ return true;
+}
+
+static bool setDoesNotAlias(Function &F, unsigned n) {
+ if (F.doesNotAlias(n))
+ return false;
+ F.setDoesNotAlias(n);
+ ++NumNoAlias;
+ return true;
+}
+
+/// Analyze the name and prototype of the given function and set any applicable
+/// attributes.
+///
+/// Returns true if any attributes were set and false otherwise.
+static bool inferPrototypeAttributes(Function &F,
+ const TargetLibraryInfo &TLI) {
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
+ FunctionType *FTy = F.getFunctionType();
+ LibFunc::Func TheLibFunc;
+ if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc)))
+ return false;
+
+ bool Changed = false;
+
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strxfrm:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strcmp: // 0,1
+ case LibFunc::strspn: // 0,1
+ case LibFunc::strncmp: // 0,1
+ case LibFunc::strcspn: // 0,1
+ case LibFunc::strcoll: // 0,1
+ case LibFunc::strcasecmp: // 0,1
+ case LibFunc::strncasecmp: //
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::sscanf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::sprintf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::snprintf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::setitimer:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::system:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "system" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::malloc:
+ if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::memcmp:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ if (FTy->getNumParams() != 3)
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::memalign:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::mkdir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::mktime:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::realloc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::read:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "read" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::rewind:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::rmdir:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::rename:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::readlink:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::write:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "write" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::bcopy:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::bcmp:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::bzero:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::calloc:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::access:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fopen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fdopen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::ferror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F);
+ return Changed;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::fgets:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 3);
+ return Changed;
+ case LibFunc::fread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fputs:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fgetpos:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::getenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::getitimer:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getpwnam:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ungetc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::uname:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::unlink:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::unsetenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::pread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::pwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::popen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::pclose:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::vscanf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vsscanf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vfscanf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::valloc:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::vprintf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vsnprintf:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::open:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::opendir:
+ if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::tmpfile:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::times:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAccessMemory(F);
+ return Changed;
+ case LibFunc::lstat:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::lchown:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::qsort:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return false;
+ // May throw; places call through function pointer.
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_strtok_r:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::under_IO_getc:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::under_IO_putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::dunder_isoc99_scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_isoc99_sscanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fopen64:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::tmpfile64:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::open64:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::gettimeofday:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // Currently some platforms have the restrict keyword on the arguments to
+ // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+ // arguments.
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+
+ default:
+ // FIXME: It'd be really nice to cover all the library functions we're
+ // aware of here.
+ return false;
+ }
+}
+
+static bool inferAllPrototypeAttributes(Module &M,
+ const TargetLibraryInfo &TLI) {
+ bool Changed = false;
+
+ for (Function &F : M.functions())
+ // We only infer things using the prototype if the definition isn't around
+ // to analyze directly.
+ if (F.isDeclaration())
+ Changed |= inferPrototypeAttributes(F, TLI);
+
+ return Changed;
+}
+
+PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
+ AnalysisManager<Module> *AM) {
+ auto &TLI = AM->getResult<TargetLibraryAnalysis>(M);
+
+ if (!inferAllPrototypeAttributes(M, TLI))
+ // If we didn't infer anything, preserve all analyses.
+ return PreservedAnalyses::all();
+
+ // Otherwise, we may have changed fundamental function attributes, so clear
+ // out all the passes.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct InferFunctionAttrsLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ InferFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeInferFunctionAttrsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ return inferAllPrototypeAttributes(M, TLI);
+ }
+};
+}
+
+char InferFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs",
+ "Infer set function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs",
+ "Infer set function attributes", false, false)
+
+Pass *llvm::createInferFunctionAttrsLegacyPass() {
+ return new InferFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
index dc56a02..1704bfe 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -14,10 +14,10 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -35,17 +35,15 @@ namespace {
/// \brief Inliner pass which only handles "always inline" functions.
class AlwaysInliner : public Inliner {
- InlineCostAnalysis *ICA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true),
- ICA(nullptr) {
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
AlwaysInliner(bool InsertLifetime)
- : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) {
+ : Inliner(ID, -2000000000, InsertLifetime) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -53,9 +51,6 @@ public:
InlineCost getInlineCost(CallSite CS) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnSCC(CallGraphSCC &SCC) override;
-
using llvm::Pass::doFinalization;
bool doFinalization(CallGraph &CG) override {
return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
@@ -67,10 +62,9 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
@@ -99,19 +93,8 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
// that are viable for inlining. FIXME: We shouldn't even get here for
// declarations.
if (Callee && !Callee->isDeclaration() &&
- CS.hasFnAttr(Attribute::AlwaysInline) &&
- ICA->isInlineViable(*Callee))
+ CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
return InlineCost::getAlways();
return InlineCost::getNever();
}
-
-bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) {
- ICA = &getAnalysis<InlineCostAnalysis>();
- return Inliner::runOnSCC(SCC);
-}
-
-void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<InlineCostAnalysis>();
- Inliner::getAnalysisUsage(AU);
-}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 9b01d81..45609f8 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -23,6 +23,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
using namespace llvm;
@@ -37,26 +38,30 @@ namespace {
/// inliner pass and the always inliner pass. The two passes use different cost
/// analyses to determine when to inline.
class SimpleInliner : public Inliner {
- InlineCostAnalysis *ICA;
public:
- SimpleInliner() : Inliner(ID), ICA(nullptr) {
+ SimpleInliner() : Inliner(ID) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
SimpleInliner(int Threshold)
- : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) {
+ : Inliner(ID, Threshold, /*InsertLifetime*/ true) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) override {
- return ICA->getInlineCost(CS, getInlineThreshold(CS));
+ Function *Callee = CS.getCalledFunction();
+ TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
+ return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
}
bool runOnSCC(CallGraphSCC &SCC) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ TargetTransformInfoWrapperPass *TTIWP;
};
static int computeThresholdFromOptLevels(unsigned OptLevel,
@@ -75,10 +80,10 @@ static int computeThresholdFromOptLevels(unsigned OptLevel,
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
@@ -95,11 +100,11 @@ Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
}
bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
- ICA = &getAnalysis<InlineCostAnalysis>();
+ TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
return Inliner::runOnSCC(SCC);
}
void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<InlineCostAnalysis>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
Inliner::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 5273c3d..bbe5f876 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -64,20 +65,22 @@ ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
-Inliner::Inliner(char &ID)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {}
+Inliner::Inliner(char &ID)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
+}
Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
- InlineLimit : Threshold),
- InsertLifetime(InsertLifetime) {}
+ : CallGraphSCCPass(ID),
+ InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
+ : Threshold),
+ InsertLifetime(InsertLifetime) {}
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -85,39 +88,6 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
InlinedArrayAllocasTy;
-/// \brief If the inlined function had a higher stack protection level than the
-/// calling function, then bump up the caller's stack protection level.
-static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
- // If upgrading the SSP attribute, clear out the old SSP Attributes first.
- // Having multiple SSP attributes doesn't actually hurt, but it adds useless
- // clutter to the IR.
- AttrBuilder B;
- B.addAttribute(Attribute::StackProtect)
- .addAttribute(Attribute::StackProtectStrong)
- .addAttribute(Attribute::StackProtectReq);
- AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
- AttributeSet::FunctionIndex,
- B);
-
- if (Callee->hasFnAttribute(Attribute::SafeStack)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::SafeStack);
- } else if (Callee->hasFnAttribute(Attribute::StackProtectReq) &&
- !Caller->hasFnAttribute(Attribute::SafeStack)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::StackProtectReq);
- } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
- !Caller->hasFnAttribute(Attribute::SafeStack) &&
- !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::StackProtectStrong);
- } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
- !Caller->hasFnAttribute(Attribute::SafeStack) &&
- !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
- !Caller->hasFnAttribute(Attribute::StackProtectStrong))
- Caller->addFnAttr(Attribute::StackProtect);
-}
-
/// If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
@@ -126,18 +96,26 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
int InlineHistory, bool InsertLifetime) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
+ // We need to manually construct BasicAA directly in order to disable
+ // its use of other function analyses.
+ BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee));
+
+ // Construct our own AA results for this function. We do this manually to
+ // work around the limitations of the legacy pass manager.
+ AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR));
+
// Try to inline the function. Get the list of static allocas that were
// inlined.
- if (!InlineFunction(CS, IFI, InsertLifetime))
+ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
return false;
- AdjustCallerSSPLevel(Caller, Callee);
+ AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
@@ -219,6 +197,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
<< *AvailableAlloca << '\n');
+ // Move affected dbg.declare calls immediately after the new alloca to
+ // avoid the situation when a dbg.declare preceeds its alloca.
+ if (auto *L = LocalAsMetadata::getIfExists(AI))
+ if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+ for (User *U : MDV->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
+ DDI->moveBefore(AvailableAlloca->getNextNode());
+
AI->replaceAllUsesWith(AvailableAlloca);
if (Align1 != Align2) {
@@ -258,39 +244,64 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
}
unsigned Inliner::getInlineThreshold(CallSite CS) const {
- int thres = InlineThreshold; // -inline-threshold or else selected by
- // overall opt level
+ int Threshold = InlineThreshold; // -inline-threshold or else selected by
+ // overall opt level
// If -inline-threshold is not given, listen to the optsize attribute when it
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
+ // FIXME: Use Function::optForSize().
Caller->hasFnAttribute(Attribute::OptimizeForSize);
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
- OptSizeThreshold < thres)
- thres = OptSizeThreshold;
+ OptSizeThreshold < Threshold)
+ Threshold = OptSizeThreshold;
- // Listen to the inlinehint attribute when it would increase the threshold
- // and the caller does not need to minimize its size.
Function *Callee = CS.getCalledFunction();
- bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::InlineHint);
- if (InlineHint && HintThreshold > thres &&
- !Caller->hasFnAttribute(Attribute::MinSize))
- thres = HintThreshold;
+ if (!Callee || Callee->isDeclaration())
+ return Threshold;
+
+ // If profile information is available, use that to adjust threshold of hot
+ // and cold functions.
+ // FIXME: The heuristic used below for determining hotness and coldness are
+ // based on preliminary SPEC tuning and may not be optimal. Replace this with
+ // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
+ uint64_t FunctionCount = 0, MaxFunctionCount = 0;
+ bool HasPGOCounts = false;
+ if (Callee->getEntryCount() &&
+ Callee->getParent()->getMaximumFunctionCount()) {
+ HasPGOCounts = true;
+ FunctionCount = Callee->getEntryCount().getValue();
+ MaxFunctionCount =
+ Callee->getParent()->getMaximumFunctionCount().getValue();
+ }
- // Listen to the cold attribute when it would decrease the threshold.
- bool ColdCallee = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::Cold);
+ // Listen to the inlinehint attribute or profile based hotness information
+ // when it would increase the threshold and the caller does not need to
+ // minimize its size.
+ bool InlineHint =
+ Callee->hasFnAttribute(Attribute::InlineHint) ||
+ (HasPGOCounts &&
+ FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
+ if (InlineHint && HintThreshold > Threshold &&
+ !Caller->hasFnAttribute(Attribute::MinSize))
+ Threshold = HintThreshold;
+
+ // Listen to the cold attribute or profile based coldness information
+ // when it would decrease the threshold.
+ bool ColdCallee =
+ Callee->hasFnAttribute(Attribute::Cold) ||
+ (HasPGOCounts &&
+ FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
// Command line argument for InlineLimit will override the default
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
// do not use the default cold threshold even if it is smaller.
if ((InlineLimit.getNumOccurrences() == 0 ||
ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
- ColdThreshold < thres)
- thres = ColdThreshold;
+ ColdThreshold < Threshold)
+ Threshold = ColdThreshold;
- return thres;
+ return Threshold;
}
static void emitAnalysis(CallSite CS, const Twine &Msg) {
@@ -430,10 +441,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ ACT = &getAnalysis<AssumptionCacheTracker>();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SmallPtrSet<Function*, 8> SCCFunctions;
DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -469,8 +478,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// If this is a direct call to an external function, we can never inline
// it. If it is an indirect call, inlining may resolve it to be a
// direct call, so we keep it.
- if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
- continue;
+ if (Function *Callee = CS.getCalledFunction())
+ if (Callee->isDeclaration())
+ continue;
CallSites.push_back(std::make_pair(CS, -1));
}
@@ -492,7 +502,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, AA, ACT);
+ InlineFunctionInfo InlineInfo(&CG, ACT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -513,7 +523,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
+ if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
DEBUG(dbgs() << " -> Deleting dead call: "
<< *CS.getInstruction() << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
@@ -550,7 +560,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
}
// Attempt to inline the function.
- if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+ if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime)) {
emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
Twine(Callee->getName() +
@@ -647,8 +657,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Scan for all of the functions, looking for ones that should now be removed
// from the program. Insert the dead ones in the FunctionsToRemove set.
- for (auto I : CG) {
- CallGraphNode *CGN = I.second;
+ for (const auto &I : CG) {
+ CallGraphNode *CGN = I.second.get();
Function *F = CGN->getFunction();
if (!F || F->isDeclaration())
continue;
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index 7950163..21bb5d0 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -60,6 +60,10 @@ namespace {
explicit InternalizePass();
explicit InternalizePass(ArrayRef<const char *> ExportList);
void LoadFile(const char *Filename);
+ bool maybeInternalize(GlobalValue &GV,
+ const std::set<const Comdat *> &ExternalComdats);
+ void checkComdatVisibility(GlobalValue &GV,
+ std::set<const Comdat *> &ExternalComdats);
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -105,40 +109,85 @@ void InternalizePass::LoadFile(const char *Filename) {
}
}
-static bool shouldInternalize(const GlobalValue &GV,
- const std::set<std::string> &ExternalNames) {
+static bool isExternallyVisible(const GlobalValue &GV,
+ const std::set<std::string> &ExternalNames) {
// Function must be defined here
if (GV.isDeclaration())
- return false;
+ return true;
// Available externally is really just a "declaration with a body".
if (GV.hasAvailableExternallyLinkage())
- return false;
+ return true;
// Assume that dllexported symbols are referenced elsewhere
if (GV.hasDLLExportStorageClass())
- return false;
-
- // Already has internal linkage
- if (GV.hasLocalLinkage())
- return false;
+ return true;
// Marked to keep external?
- if (ExternalNames.count(GV.getName()))
- return false;
+ if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName()))
+ return true;
+
+ return false;
+}
+// Internalize GV if it is possible to do so, i.e. it is not externally visible
+// and is not a member of an externally visible comdat.
+bool InternalizePass::maybeInternalize(
+ GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+ if (Comdat *C = GV.getComdat()) {
+ if (ExternalComdats.count(C))
+ return false;
+
+ // If a comdat is not externally visible we can drop it.
+ if (auto GO = dyn_cast<GlobalObject>(&GV))
+ GO->setComdat(nullptr);
+
+ if (GV.hasLocalLinkage())
+ return false;
+ } else {
+ if (GV.hasLocalLinkage())
+ return false;
+
+ if (isExternallyVisible(GV, ExternalNames))
+ return false;
+ }
+
+ GV.setVisibility(GlobalValue::DefaultVisibility);
+ GV.setLinkage(GlobalValue::InternalLinkage);
return true;
}
+// If GV is part of a comdat and is externally visible, keep track of its
+// comdat so that we don't internalize any of its members.
+void InternalizePass::checkComdatVisibility(
+ GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+ Comdat *C = GV.getComdat();
+ if (!C)
+ return;
+
+ if (isExternallyVisible(GV, ExternalNames))
+ ExternalComdats.insert(C);
+}
+
bool InternalizePass::runOnModule(Module &M) {
CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
- bool Changed = false;
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, false);
+ // Collect comdat visiblity information for the module.
+ std::set<const Comdat *> ExternalComdats;
+ if (!M.getComdatSymbolTable().empty()) {
+ for (Function &F : M)
+ checkComdatVisibility(F, ExternalComdats);
+ for (GlobalVariable &GV : M.globals())
+ checkComdatVisibility(GV, ExternalComdats);
+ for (GlobalAlias &GA : M.aliases())
+ checkComdatVisibility(GA, ExternalComdats);
+ }
+
// We must assume that globals in llvm.used have a reference that not even
// the linker can see, so we don't internalize them.
// For llvm.compiler.used the situation is a bit fuzzy. The assembler and
@@ -153,20 +202,16 @@ bool InternalizePass::runOnModule(Module &M) {
}
// Mark all functions not in the api as internal.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ for (Function &I : M) {
+ if (!maybeInternalize(I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
-
if (ExternalNode)
// Remove a callgraph edge from the external node to this function.
- ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
- Changed = true;
++NumFunctions;
- DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
}
// Never internalize the llvm.used symbol. It is used to implement
@@ -191,12 +236,9 @@ bool InternalizePass::runOnModule(Module &M) {
// internal as well.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ if (!maybeInternalize(*I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
++NumGlobals;
DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
}
@@ -204,17 +246,20 @@ bool InternalizePass::runOnModule(Module &M) {
// Mark all aliases that are not in the api as internal as well.
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ if (!maybeInternalize(*I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
++NumAliases;
DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
}
- return Changed;
+ // We do not keep track of whether this pass changed the module because
+ // it adds unnecessary complexity:
+ // 1) This pass will generally be near the start of the pass pipeline, so
+ // there will be no analyses to invalidate.
+ // 2) This pass will most likely end up changing the module and it isn't worth
+ // worrying about optimizing the case where the module is unchanged.
+ return true;
}
ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 41334ca..8e4ad64 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -43,12 +43,13 @@ namespace {
initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(BreakCriticalEdgesID);
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
}
};
}
@@ -79,7 +80,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
//
Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
-bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
@@ -92,6 +93,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
bool Changed = false;
// If there is more than one top-level loop in this function, extract all of
@@ -120,14 +122,14 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
}
if (ShouldExtractLoop) {
- // We must omit landing pads. Landing pads must accompany the invoke
+ // We must omit EH pads. EH pads must accompany the invoke
// instruction. But this would result in a loop in the extracted
// function. An infinite cycle occurs when it tries to extract that loop as
// well.
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getExitBlocks(ExitBlocks);
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i]->isLandingPad()) {
+ if (ExitBlocks[i]->isEHPad()) {
ShouldExtractLoop = false;
break;
}
@@ -141,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
- LPM.deleteLoopFromQueue(L);
+ LI.updateUnloop(L);
}
++NumExtracted;
}
@@ -259,7 +261,7 @@ bool BlockExtractorPass::runOnModule(Module &M) {
// Figure out which index the basic block is in its function.
Function::iterator BBI = MF->begin();
std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
- TranslatedBlocksToNotExtract.insert(BBI);
+ TranslatedBlocksToNotExtract.insert(&*BBI);
}
while (!BlocksToNotExtractByName.empty()) {
@@ -278,7 +280,7 @@ bool BlockExtractorPass::runOnModule(Module &M) {
BasicBlock &BB = *BI;
if (BB.getName() != BlockName) continue;
- TranslatedBlocksToNotExtract.insert(BI);
+ TranslatedBlocksToNotExtract.insert(&*BI);
}
}
@@ -291,8 +293,8 @@ bool BlockExtractorPass::runOnModule(Module &M) {
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
SplitLandingPadPreds(&*F);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (!TranslatedBlocksToNotExtract.count(BB))
- BlocksToExtract.push_back(BB);
+ if (!TranslatedBlocksToNotExtract.count(&*BB))
+ BlocksToExtract.push_back(&*BB);
}
for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
index c6795c6..7b51574 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -26,6 +28,8 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -59,9 +63,9 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
bool BitSetInfo::containsValue(
const DataLayout &DL,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
uint64_t COffset) const {
- if (auto GV = dyn_cast<GlobalVariable>(V)) {
+ if (auto GV = dyn_cast<GlobalObject>(V)) {
auto I = GlobalLayout.find(GV);
if (I == GlobalLayout.end())
return false;
@@ -90,6 +94,21 @@ bool BitSetInfo::containsValue(
return false;
}
+void BitSetInfo::print(raw_ostream &OS) const {
+ OS << "offset " << ByteOffset << " size " << BitSize << " align "
+ << (1 << AlignLog2);
+
+ if (isAllOnes()) {
+ OS << " all-ones\n";
+ return;
+ }
+
+ OS << " { ";
+ for (uint64_t B : Bits)
+ OS << B << ' ';
+ OS << "}\n";
+}
+
BitSetInfo BitSetBuilder::build() {
if (Min > Max)
Min = 0;
@@ -193,34 +212,48 @@ struct LowerBitSets : public ModulePass {
Module *M;
bool LinkerSubsectionsViaSymbols;
+ Triple::ArchType Arch;
+ Triple::ObjectFormatType ObjectFormat;
IntegerType *Int1Ty;
IntegerType *Int8Ty;
IntegerType *Int32Ty;
Type *Int32PtrTy;
IntegerType *Int64Ty;
- Type *IntPtrTy;
+ IntegerType *IntPtrTy;
// The llvm.bitsets named metadata.
NamedMDNode *BitSetNM;
- // Mapping from bitset mdstrings to the call sites that test them.
- DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+ // Mapping from bitset identifiers to the call sites that test them.
+ DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites;
std::vector<ByteArrayInfo> ByteArrayInfos;
BitSetInfo
- buildBitSet(MDString *BitSet,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ buildBitSet(Metadata *BitSet,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
ByteArrayInfo *createByteArray(BitSetInfo &BSI);
void allocateByteArrays();
Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
Value *BitOffset);
+ void lowerBitSetCalls(ArrayRef<Metadata *> BitSets,
+ Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
Value *
lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- GlobalVariable *CombinedGlobal,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
- void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
- const std::vector<GlobalVariable *> &Globals);
+ Constant *CombinedGlobal,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets,
+ ArrayRef<GlobalVariable *> Globals);
+ unsigned getJumpTableEntrySize();
+ Type *getJumpTableEntryType();
+ Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
+ unsigned Distance);
+ void verifyBitSetMDNode(MDNode *Op);
+ void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ ArrayRef<Function *> Functions);
+ void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets,
+ ArrayRef<GlobalObject *> Globals);
bool buildBitSets();
bool eraseBitSetMetadata();
@@ -228,7 +261,7 @@ struct LowerBitSets : public ModulePass {
bool runOnModule(Module &M) override;
};
-} // namespace
+} // anonymous namespace
INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
"Lower bitset metadata", false, false)
@@ -244,6 +277,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
Triple TargetTriple(M->getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+ Arch = TargetTriple.getArch();
+ ObjectFormat = TargetTriple.getObjectFormat();
Int1Ty = Type::getInt1Ty(M->getContext());
Int8Ty = Type::getInt8Ty(M->getContext());
@@ -262,8 +297,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
/// Build a bit set for BitSet using the object layouts in
/// GlobalLayout.
BitSetInfo LowerBitSets::buildBitSet(
- MDString *BitSet,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Metadata *BitSet,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
BitSetBuilder BSB;
// Compute the byte offset of each element of this bitset.
@@ -271,8 +306,11 @@ BitSetInfo LowerBitSets::buildBitSet(
for (MDNode *Op : BitSetNM->operands()) {
if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
continue;
- auto OpGlobal = dyn_cast<GlobalVariable>(
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ Constant *OpConst =
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
+ if (auto GA = dyn_cast<GlobalAlias>(OpConst))
+ OpConst = GA->getAliasee();
+ auto OpGlobal = dyn_cast<GlobalObject>(OpConst);
if (!OpGlobal)
continue;
uint64_t Offset =
@@ -360,9 +398,8 @@ void LowerBitSets::allocateByteArrays() {
if (LinkerSubsectionsViaSymbols) {
BAI->ByteArray->replaceAllUsesWith(GEP);
} else {
- GlobalAlias *Alias =
- GlobalAlias::create(PointerType::getUnqual(Int8Ty),
- GlobalValue::PrivateLinkage, "bits", GEP, M);
+ GlobalAlias *Alias = GlobalAlias::create(
+ Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
BAI->ByteArray->replaceAllUsesWith(Alias);
}
BAI->ByteArray->eraseFromParent();
@@ -404,7 +441,7 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
- ByteArray = GlobalAlias::create(BAI->ByteArray->getType(),
+ ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
GlobalValue::PrivateLinkage, "bits_use",
ByteArray, M);
}
@@ -421,17 +458,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
/// replace the call with.
Value *LowerBitSets::lowerBitSetCall(
CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- GlobalVariable *CombinedGlobal,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Constant *CombinedGlobalIntAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
Value *Ptr = CI->getArgOperand(0);
const DataLayout &DL = M->getDataLayout();
if (BSI.containsValue(DL, GlobalLayout, Ptr))
- return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
+ return ConstantInt::getTrue(M->getContext());
- Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
- GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+ CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
BasicBlock *InitialBB = CI->getParent();
@@ -490,18 +526,19 @@ Value *LowerBitSets::lowerBitSetCall(
/// Given a disjoint set of bitsets and globals, layout the globals, build the
/// bit sets and lower the llvm.bitset.test calls.
-void LowerBitSets::buildBitSetsFromGlobals(
- const std::vector<MDString *> &BitSets,
- const std::vector<GlobalVariable *> &Globals) {
+void LowerBitSets::buildBitSetsFromGlobalVariables(
+ ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) {
// Build a new global with the combined contents of the referenced globals.
+ // This global is a struct whose even-indexed elements contain the original
+ // contents of the referenced globals and whose odd-indexed elements contain
+ // any padding required to align the next element to the next power of 2.
std::vector<Constant *> GlobalInits;
const DataLayout &DL = M->getDataLayout();
for (GlobalVariable *G : Globals) {
GlobalInits.push_back(G->getInitializer());
- uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
+ uint64_t InitSize = DL.getTypeAllocSize(G->getValueType());
- // Compute the amount of padding required to align the next element to the
- // next power of 2.
+ // Compute the amount of padding required.
uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
// Cap at 128 was found experimentally to have a good data/instruction
@@ -515,34 +552,20 @@ void LowerBitSets::buildBitSetsFromGlobals(
if (!GlobalInits.empty())
GlobalInits.pop_back();
Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
- auto CombinedGlobal =
+ auto *CombinedGlobal =
new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
GlobalValue::PrivateLinkage, NewInit);
- const StructLayout *CombinedGlobalLayout =
- DL.getStructLayout(cast<StructType>(NewInit->getType()));
+ StructType *NewTy = cast<StructType>(NewInit->getType());
+ const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
// Compute the offsets of the original globals within the new global.
- DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+ DenseMap<GlobalObject *, uint64_t> GlobalLayout;
for (unsigned I = 0; I != Globals.size(); ++I)
// Multiply by 2 to account for padding elements.
GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
- // For each bitset in this disjoint set...
- for (MDString *BS : BitSets) {
- // Build the bitset.
- BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
-
- ByteArrayInfo *BAI = 0;
-
- // Lower each call to llvm.bitset.test for this bitset.
- for (CallInst *CI : BitSetTestCallSites[BS]) {
- ++NumBitSetCallsLowered;
- Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
- CI->replaceAllUsesWith(Lowered);
- CI->eraseFromParent();
- }
- }
+ lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout);
// Build aliases pointing to offsets into the combined global for each
// global from which we built the combined global, and replace references
@@ -556,9 +579,11 @@ void LowerBitSets::buildBitSetsFromGlobals(
if (LinkerSubsectionsViaSymbols) {
Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
} else {
- GlobalAlias *GAlias =
- GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(),
- "", CombinedGlobalElemPtr, M);
+ assert(Globals[I]->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
+ Globals[I]->getLinkage(), "",
+ CombinedGlobalElemPtr, M);
+ GAlias->setVisibility(Globals[I]->getVisibility());
GAlias->takeName(Globals[I]);
Globals[I]->replaceAllUsesWith(GAlias);
}
@@ -566,6 +591,331 @@ void LowerBitSets::buildBitSetsFromGlobals(
}
}
+void LowerBitSets::lowerBitSetCalls(
+ ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+ Constant *CombinedGlobalIntAddr =
+ ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
+
+ // For each bitset in this disjoint set...
+ for (Metadata *BS : BitSets) {
+ // Build the bitset.
+ BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+ DEBUG({
+ if (auto BSS = dyn_cast<MDString>(BS))
+ dbgs() << BSS->getString() << ": ";
+ else
+ dbgs() << "<unnamed>: ";
+ BSI.print(dbgs());
+ });
+
+ ByteArrayInfo *BAI = nullptr;
+
+ // Lower each call to llvm.bitset.test for this bitset.
+ for (CallInst *CI : BitSetTestCallSites[BS]) {
+ ++NumBitSetCallsLowered;
+ Value *Lowered =
+ lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
+ }
+}
+
+void LowerBitSets::verifyBitSetMDNode(MDNode *Op) {
+ if (Op->getNumOperands() != 3)
+ report_fatal_error(
+ "All operands of llvm.bitsets metadata must have 3 elements");
+ if (!Op->getOperand(1))
+ return;
+
+ auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+ if (!OpConstMD)
+ report_fatal_error("Bit set element must be a constant");
+ auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue());
+ if (!OpGlobal)
+ return;
+
+ if (OpGlobal->isThreadLocal())
+ report_fatal_error("Bit set element may not be thread-local");
+ if (OpGlobal->hasSection())
+ report_fatal_error("Bit set element may not have an explicit section");
+
+ if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker())
+ report_fatal_error("Bit set global var element must be a definition");
+
+ auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+ if (!OffsetConstMD)
+ report_fatal_error("Bit set element offset must be a constant");
+ auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+ if (!OffsetInt)
+ report_fatal_error("Bit set element offset must be an integer constant");
+}
+
+static const unsigned kX86JumpTableEntrySize = 8;
+
+unsigned LowerBitSets::getJumpTableEntrySize() {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ return kX86JumpTableEntrySize;
+}
+
+// Create a constant representing a jump table entry for the target. This
+// consists of an instruction sequence containing a relative branch to Dest. The
+// constant will be laid out at address Src+(Len*Distance) where Len is the
+// target-specific jump table entry size.
+Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
+ unsigned Distance) {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ const unsigned kJmpPCRel32Code = 0xe9;
+ const unsigned kInt3Code = 0xcc;
+
+ ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code);
+
+ // Build a constant representing the displacement between the constant's
+ // address and Dest. This will resolve to a PC32 relocation referring to Dest.
+ Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy);
+ Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy);
+ Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt);
+ ConstantInt *DispOffset =
+ ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5);
+ Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset);
+ OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty);
+
+ ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code);
+
+ Constant *Fields[] = {
+ Jmp, OffsetedDisp, Int3, Int3, Int3,
+ };
+ return ConstantStruct::getAnon(Fields, /*Packed=*/true);
+}
+
+Type *LowerBitSets::getJumpTableEntryType() {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ return StructType::get(M->getContext(),
+ {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty},
+ /*Packed=*/true);
+}
+
+/// Given a disjoint set of bitsets and functions, build a jump table for the
+/// functions, build the bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ ArrayRef<Function *> Functions) {
+ // Unlike the global bitset builder, the function bitset builder cannot
+ // re-arrange functions in a particular order and base its calculations on the
+ // layout of the functions' entry points, as we have no idea how large a
+ // particular function will end up being (the size could even depend on what
+ // this pass does!) Instead, we build a jump table, which is a block of code
+ // consisting of one branch instruction for each of the functions in the bit
+ // set that branches to the target function, and redirect any taken function
+ // addresses to the corresponding jump table entry. In the object file's
+ // symbol table, the symbols for the target functions also refer to the jump
+ // table entries, so that addresses taken outside the module will pass any
+ // verification done inside the module.
+ //
+ // In more concrete terms, suppose we have three functions f, g, h which are
+ // members of a single bitset, and a function foo that returns their
+ // addresses:
+ //
+ // f:
+ // mov 0, %eax
+ // ret
+ //
+ // g:
+ // mov 1, %eax
+ // ret
+ //
+ // h:
+ // mov 2, %eax
+ // ret
+ //
+ // foo:
+ // mov f, %eax
+ // mov g, %edx
+ // mov h, %ecx
+ // ret
+ //
+ // To create a jump table for these functions, we instruct the LLVM code
+ // generator to output a jump table in the .text section. This is done by
+ // representing the instructions in the jump table as an LLVM constant and
+ // placing them in a global variable in the .text section. The end result will
+ // (conceptually) look like this:
+ //
+ // f:
+ // jmp .Ltmp0 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // g:
+ // jmp .Ltmp1 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // h:
+ // jmp .Ltmp2 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // .Ltmp0:
+ // mov 0, %eax
+ // ret
+ //
+ // .Ltmp1:
+ // mov 1, %eax
+ // ret
+ //
+ // .Ltmp2:
+ // mov 2, %eax
+ // ret
+ //
+ // foo:
+ // mov f, %eax
+ // mov g, %edx
+ // mov h, %ecx
+ // ret
+ //
+ // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
+ // normal case the check can be carried out using the same kind of simple
+ // arithmetic that we normally use for globals.
+
+ assert(!Functions.empty());
+
+ // Build a simple layout based on the regular layout of jump tables.
+ DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+ unsigned EntrySize = getJumpTableEntrySize();
+ for (unsigned I = 0; I != Functions.size(); ++I)
+ GlobalLayout[Functions[I]] = I * EntrySize;
+
+ // Create a constant to hold the jump table.
+ ArrayType *JumpTableType =
+ ArrayType::get(getJumpTableEntryType(), Functions.size());
+ auto JumpTable = new GlobalVariable(*M, JumpTableType,
+ /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, nullptr);
+ JumpTable->setSection(ObjectFormat == Triple::MachO
+ ? "__TEXT,__text,regular,pure_instructions"
+ : ".text");
+ lowerBitSetCalls(BitSets, JumpTable, GlobalLayout);
+
+ // Build aliases pointing to offsets into the jump table, and replace
+ // references to the original functions with references to the aliases.
+ for (unsigned I = 0; I != Functions.size(); ++I) {
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
+ ConstantExpr::getGetElementPtr(
+ JumpTableType, JumpTable,
+ ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, I)}),
+ Functions[I]->getType());
+ if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) {
+ Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+ } else {
+ assert(Functions[I]->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0,
+ Functions[I]->getLinkage(), "",
+ CombinedGlobalElemPtr, M);
+ GAlias->setVisibility(Functions[I]->getVisibility());
+ GAlias->takeName(Functions[I]);
+ Functions[I]->replaceAllUsesWith(GAlias);
+ }
+ if (!Functions[I]->isDeclarationForLinker())
+ Functions[I]->setLinkage(GlobalValue::PrivateLinkage);
+ }
+
+ // Build and set the jump table's initializer.
+ std::vector<Constant *> JumpTableEntries;
+ for (unsigned I = 0; I != Functions.size(); ++I)
+ JumpTableEntries.push_back(
+ createJumpTableEntry(JumpTable, Functions[I], I));
+ JumpTable->setInitializer(
+ ConstantArray::get(JumpTableType, JumpTableEntries));
+}
+
+void LowerBitSets::buildBitSetsFromDisjointSet(
+ ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) {
+ llvm::DenseMap<Metadata *, uint64_t> BitSetIndices;
+ llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices;
+ for (unsigned I = 0; I != BitSets.size(); ++I)
+ BitSetIndices[BitSets[I]] = I;
+ for (unsigned I = 0; I != Globals.size(); ++I)
+ GlobalIndices[Globals[I]] = I;
+
+ // For each bitset, build a set of indices that refer to globals referenced by
+ // the bitset.
+ std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ // Op = { bitset name, global, offset }
+ if (!Op->getOperand(1))
+ continue;
+ auto I = BitSetIndices.find(Op->getOperand(0));
+ if (I == BitSetIndices.end())
+ continue;
+
+ auto OpGlobal = dyn_cast<GlobalObject>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ if (!OpGlobal)
+ continue;
+ BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+ }
+ }
+
+ // Order the sets of indices by size. The GlobalLayoutBuilder works best
+ // when given small index sets first.
+ std::stable_sort(
+ BitSetMembers.begin(), BitSetMembers.end(),
+ [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
+
+ // Create a GlobalLayoutBuilder and provide it with index sets as layout
+ // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
+ // close together as possible.
+ GlobalLayoutBuilder GLB(Globals.size());
+ for (auto &&MemSet : BitSetMembers)
+ GLB.addFragment(MemSet);
+
+ // Build the bitsets from this disjoint set.
+ if (Globals.empty() || isa<GlobalVariable>(Globals[0])) {
+ // Build a vector of global variables with the computed layout.
+ std::vector<GlobalVariable *> OrderedGVs(Globals.size());
+ auto OGI = OrderedGVs.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
+ if (!GV)
+ report_fatal_error(
+ "Bit set may not contain both global variables and functions");
+ *OGI++ = GV;
+ }
+ }
+
+ buildBitSetsFromGlobalVariables(BitSets, OrderedGVs);
+ } else {
+ // Build a vector of functions with the computed layout.
+ std::vector<Function *> OrderedFns(Globals.size());
+ auto OFI = OrderedFns.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ auto Fn = dyn_cast<Function>(Globals[Offset]);
+ if (!Fn)
+ report_fatal_error(
+ "Bit set may not contain both global variables and functions");
+ *OFI++ = Fn;
+ }
+ }
+
+ buildBitSetsFromFunctions(BitSets, OrderedFns);
+ }
+}
+
/// Lower all bit sets in this module.
bool LowerBitSets::buildBitSets() {
Function *BitSetTestFunc =
@@ -576,24 +926,36 @@ bool LowerBitSets::buildBitSets() {
// Equivalence class set containing bitsets and the globals they reference.
// This is used to partition the set of bitsets in the module into disjoint
// sets.
- typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+ typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
GlobalClassesTy;
GlobalClassesTy GlobalClasses;
+ // Verify the bitset metadata and build a mapping from bitset identifiers to
+ // their last observed index in BitSetNM. This will used later to
+ // deterministically order the list of bitset identifiers.
+ llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices;
+ if (BitSetNM) {
+ for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) {
+ MDNode *Op = BitSetNM->getOperand(I);
+ verifyBitSetMDNode(Op);
+ BitSetIdIndices[Op->getOperand(0)] = I;
+ }
+ }
+
for (const Use &U : BitSetTestFunc->uses()) {
auto CI = cast<CallInst>(U.getUser());
auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
- if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+ if (!BitSetMDVal)
report_fatal_error(
- "Second argument of llvm.bitset.test must be metadata string");
- auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+ "Second argument of llvm.bitset.test must be metadata");
+ auto BitSet = BitSetMDVal->getMetadata();
// Add the call site to the list of call sites for this bit set. We also use
// BitSetTestCallSites to keep track of whether we have seen this bit set
// before. If we have, we don't need to re-add the referenced globals to the
// equivalence class.
- std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+ std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator,
bool> Ins =
BitSetTestCallSites.insert(
std::make_pair(BitSet, std::vector<CallInst *>()));
@@ -608,31 +970,16 @@ bool LowerBitSets::buildBitSets() {
if (!BitSetNM)
continue;
- // Verify the bitset metadata and add the referenced globals to the bitset's
- // equivalence class.
+ // Add the referenced globals to the bitset's equivalence class.
for (MDNode *Op : BitSetNM->operands()) {
- if (Op->getNumOperands() != 3)
- report_fatal_error(
- "All operands of llvm.bitsets metadata must have 3 elements");
-
if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
continue;
- auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
- if (!OpConstMD)
- report_fatal_error("Bit set element must be a constant");
- auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
+ auto OpGlobal = dyn_cast<GlobalObject>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
if (!OpGlobal)
continue;
- auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
- if (!OffsetConstMD)
- report_fatal_error("Bit set element offset must be a constant");
- auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
- if (!OffsetInt)
- report_fatal_error(
- "Bit set element offset must be an integer constant");
-
CurSet = GlobalClasses.unionSets(
CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
}
@@ -641,79 +988,51 @@ bool LowerBitSets::buildBitSets() {
if (GlobalClasses.empty())
return false;
- // For each disjoint set we found...
+ // Build a list of disjoint sets ordered by their maximum BitSetNM index
+ // for determinism.
+ std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
E = GlobalClasses.end();
I != E; ++I) {
if (!I->isLeader()) continue;
-
++NumBitSetDisjointSets;
- // Build the list of bitsets and referenced globals in this disjoint set.
- std::vector<MDString *> BitSets;
- std::vector<GlobalVariable *> Globals;
- llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
- llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
+ unsigned MaxIndex = 0;
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
MI != GlobalClasses.member_end(); ++MI) {
- if ((*MI).is<MDString *>()) {
- BitSetIndices[MI->get<MDString *>()] = BitSets.size();
- BitSets.push_back(MI->get<MDString *>());
- } else {
- GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
- Globals.push_back(MI->get<GlobalVariable *>());
- }
+ if ((*MI).is<Metadata *>())
+ MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]);
}
+ Sets.emplace_back(I, MaxIndex);
+ }
+ std::sort(Sets.begin(), Sets.end(),
+ [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
+ const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
+ return S1.second < S2.second;
+ });
- // For each bitset, build a set of indices that refer to globals referenced
- // by the bitset.
- std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
- if (BitSetNM) {
- for (MDNode *Op : BitSetNM->operands()) {
- // Op = { bitset name, global, offset }
- if (!Op->getOperand(1))
- continue;
- auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
- if (I == BitSetIndices.end())
- continue;
-
- auto OpGlobal = dyn_cast<GlobalVariable>(
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
- if (!OpGlobal)
- continue;
- BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
- }
+ // For each disjoint set we found...
+ for (const auto &S : Sets) {
+ // Build the list of bitsets in this disjoint set.
+ std::vector<Metadata *> BitSets;
+ std::vector<GlobalObject *> Globals;
+ for (GlobalClassesTy::member_iterator MI =
+ GlobalClasses.member_begin(S.first);
+ MI != GlobalClasses.member_end(); ++MI) {
+ if ((*MI).is<Metadata *>())
+ BitSets.push_back(MI->get<Metadata *>());
+ else
+ Globals.push_back(MI->get<GlobalObject *>());
}
- // Order the sets of indices by size. The GlobalLayoutBuilder works best
- // when given small index sets first.
- std::stable_sort(
- BitSetMembers.begin(), BitSetMembers.end(),
- [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
- return O1.size() < O2.size();
- });
-
- // Create a GlobalLayoutBuilder and provide it with index sets as layout
- // fragments. The GlobalLayoutBuilder tries to lay out members of fragments
- // as close together as possible.
- GlobalLayoutBuilder GLB(Globals.size());
- for (auto &&MemSet : BitSetMembers)
- GLB.addFragment(MemSet);
-
- // Build a vector of globals with the computed layout.
- std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
- auto OGI = OrderedGlobals.begin();
- for (auto &&F : GLB.Fragments)
- for (auto &&Offset : F)
- *OGI++ = Globals[Offset];
-
- // Order bitsets by name for determinism.
- std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
- return S1->getString() < S2->getString();
+ // Order bitsets by BitSetNM index for determinism. This ordering is stable
+ // as there is a one-to-one mapping between metadata and indices.
+ std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) {
+ return BitSetIdIndices[M1] < BitSetIdIndices[M2];
});
- // Build the bitsets from this disjoint set.
- buildBitSetsFromGlobals(BitSets, OrderedGlobals);
+ // Lower the bitsets in this disjoint set.
+ buildBitSetsFromDisjointSet(BitSets, Globals);
}
allocateByteArrays();
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 2e3519e..8a209a1 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -27,6 +27,14 @@
// -- We define Function* container class with custom "operator<" (FunctionPtr).
// -- "FunctionPtr" instances are stored in std::set collection, so every
// std::set::insert operation will give you result in log(N) time.
+//
+// As an optimization, a hash of the function structure is calculated first, and
+// two functions are only compared if they have the same hash. This hash is
+// cheap to compute, and has the property that if function F == G according to
+// the comparison function, then hash(F) == hash(G). This consistency property
+// is critical to ensuring all possible merging opportunities are exploited.
+// Collisions in the hash affect the speed of the pass but not the correctness
+// or determinism of the resulting transformation.
//
// When a match is found the functions are folded. If both functions are
// overridable, we move the functionality into a new internal function and
@@ -87,6 +95,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -97,12 +106,14 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mergefunc"
@@ -121,21 +132,64 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck(
namespace {
+/// GlobalNumberState assigns an integer to each global value in the program,
+/// which is used by the comparison routine to order references to globals. This
+/// state must be preserved throughout the pass, because Functions and other
+/// globals need to maintain their relative order. Globals are assigned a number
+/// when they are first visited. This order is deterministic, and so the
+/// assigned numbers are as well. When two functions are merged, neither number
+/// is updated. If the symbols are weak, this would be incorrect. If they are
+/// strong, then one will be replaced at all references to the other, and so
+/// direct callsites will now see one or the other symbol, and no update is
+/// necessary. Note that if we were guaranteed unique names, we could just
+/// compare those, but this would not work for stripped bitcodes or for those
+/// few symbols without a name.
+class GlobalNumberState {
+ struct Config : ValueMapConfig<GlobalValue*> {
+ enum { FollowRAUW = false };
+ };
+ // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
+ // occurs, the mapping does not change. Tracking changes is unnecessary, and
+ // also problematic for weak symbols (which may be overwritten).
+ typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
+ ValueNumberMap GlobalNumbers;
+ // The next unused serial number to assign to a global.
+ uint64_t NextNumber;
+ public:
+ GlobalNumberState() : GlobalNumbers(), NextNumber(0) {}
+ uint64_t getNumber(GlobalValue* Global) {
+ ValueNumberMap::iterator MapIter;
+ bool Inserted;
+ std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
+ if (Inserted)
+ NextNumber++;
+ return MapIter->second;
+ }
+ void clear() {
+ GlobalNumbers.clear();
+ }
+};
+
/// FunctionComparator - Compares two functions to determine whether or not
/// they will generate machine code with the same behaviour. DataLayout is
/// used if available. The comparator always fails conservatively (erring on the
/// side of claiming that two functions are different).
class FunctionComparator {
public:
- FunctionComparator(const Function *F1, const Function *F2)
- : FnL(F1), FnR(F2) {}
+ FunctionComparator(const Function *F1, const Function *F2,
+ GlobalNumberState* GN)
+ : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
/// Test whether the two functions have equivalent behaviour.
int compare();
+ /// Hash a function. Equivalent functions will have the same hash, and unequal
+ /// functions will have different hashes with high probability.
+ typedef uint64_t FunctionHash;
+ static FunctionHash functionHash(Function &);
private:
/// Test whether two basic blocks have equivalent behaviour.
- int compare(const BasicBlock *BBL, const BasicBlock *BBR);
+ int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR);
/// Constants comparison.
/// Its analog to lexicographical comparison between hypothetical numbers
@@ -241,6 +295,10 @@ private:
/// If these properties are equal - compare their contents.
int cmpConstants(const Constant *L, const Constant *R);
+ /// Compares two global values by number. Uses the GlobalNumbersState to
+ /// identify the same gobals across function calls.
+ int cmpGlobalValues(GlobalValue *L, GlobalValue *R);
+
/// Assign or look up previously assigned numbers for the two values, and
/// return whether the numbers are equal. Numbers are assigned in the order
/// visited.
@@ -320,8 +378,9 @@ private:
///
/// 1. If types are of different kind (different type IDs).
/// Return result of type IDs comparison, treating them as numbers.
- /// 2. If types are vectors or integers, compare Type* values as numbers.
- /// 3. Types has same ID, so check whether they belongs to the next group:
+ /// 2. If types are integers, check that they have the same width. If they
+ /// are vectors, check that they have the same count and subtype.
+ /// 3. Types have the same ID, so check whether they are one of:
/// * Void
/// * Float
/// * Double
@@ -330,8 +389,7 @@ private:
/// * PPC_FP128
/// * Label
/// * Metadata
- /// If so - return 0, yes - we can treat these types as equal only because
- /// their IDs are same.
+ /// We can treat these types as equal whenever their IDs are same.
/// 4. If Left and Right are pointers, return result of address space
/// comparison (numbers comparison). We can treat pointer types of same
/// address space as equal.
@@ -343,11 +401,13 @@ private:
int cmpTypes(Type *TyL, Type *TyR) const;
int cmpNumbers(uint64_t L, uint64_t R) const;
-
int cmpAPInts(const APInt &L, const APInt &R) const;
int cmpAPFloats(const APFloat &L, const APFloat &R) const;
- int cmpStrings(StringRef L, StringRef R) const;
+ int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
+ int cmpMem(StringRef L, StringRef R) const;
int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+ int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
+ int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
// The two functions undergoing comparison.
const Function *FnL, *FnR;
@@ -386,30 +446,30 @@ private:
/// could be operands from further BBs we didn't scan yet.
/// So it's impossible to use dominance properties in general.
DenseMap<const Value*, int> sn_mapL, sn_mapR;
+
+ // The global state we will use
+ GlobalNumberState* GlobalNumbers;
};
class FunctionNode {
mutable AssertingVH<Function> F;
-
+ FunctionComparator::FunctionHash Hash;
public:
- FunctionNode(Function *F) : F(F) {}
+ // Note the hash is recalculated potentially multiple times, but it is cheap.
+ FunctionNode(Function *F)
+ : F(F), Hash(FunctionComparator::functionHash(*F)) {}
Function *getFunc() const { return F; }
+ FunctionComparator::FunctionHash getHash() const { return Hash; }
/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
void replaceBy(Function *G) const {
- assert(!(*this < FunctionNode(G)) && !(FunctionNode(G) < *this) &&
- "The two functions must be equal");
-
F = G;
}
- void release() { F = 0; }
- bool operator<(const FunctionNode &RHS) const {
- return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
- }
+ void release() { F = nullptr; }
};
-}
+} // end anonymous namespace
int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
if (L < R) return -1;
@@ -426,13 +486,25 @@ int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
}
int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
- if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
- (uint64_t)&R.getSemantics()))
+ // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+ // then by value interpreted as a bitstring (aka APInt).
+ const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+ if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+ APFloat::semanticsPrecision(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+ APFloat::semanticsMaxExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+ APFloat::semanticsMinExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+ APFloat::semanticsSizeInBits(SR)))
return Res;
return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
}
-int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
// Prevent heavy comparison, compare sizes first.
if (int Res = cmpNumbers(L.size(), R.size()))
return Res;
@@ -466,6 +538,59 @@ int FunctionComparator::cmpAttrs(const AttributeSet L,
return 0;
}
+int FunctionComparator::cmpRangeMetadata(const MDNode* L,
+ const MDNode* R) const {
+ if (L == R)
+ return 0;
+ if (!L)
+ return -1;
+ if (!R)
+ return 1;
+ // Range metadata is a sequence of numbers. Make sure they are the same
+ // sequence.
+ // TODO: Note that as this is metadata, it is possible to drop and/or merge
+ // this data when considering functions to merge. Thus this comparison would
+ // return 0 (i.e. equivalent), but merging would become more complicated
+ // because the ranges would need to be unioned. It is not likely that
+ // functions differ ONLY in this metadata if they are actually the same
+ // function semantically.
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+ for (size_t I = 0; I < L->getNumOperands(); ++I) {
+ ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+ ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+ if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+ return Res;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
+ const Instruction *R) const {
+ ImmutableCallSite LCS(L);
+ ImmutableCallSite RCS(R);
+
+ assert(LCS && RCS && "Must be calls or invokes!");
+ assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+
+ if (int Res =
+ cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+ return Res;
+
+ for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
+ auto OBL = LCS.getOperandBundleAt(i);
+ auto OBR = RCS.getOperandBundleAt(i);
+
+ if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+ return Res;
+
+ if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+ return Res;
+ }
+
+ return 0;
+}
+
/// Constants comparison:
/// 1. Check whether type of L constant could be losslessly bitcasted to R
/// type.
@@ -500,9 +625,9 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
unsigned TyLWidth = 0;
unsigned TyRWidth = 0;
- if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL))
+ if (auto *VecTyL = dyn_cast<VectorType>(TyL))
TyLWidth = VecTyL->getBitWidth();
- if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR))
+ if (auto *VecTyR = dyn_cast<VectorType>(TyR))
TyRWidth = VecTyR->getBitWidth();
if (TyLWidth != TyRWidth)
@@ -538,11 +663,29 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
if (!L->isNullValue() && R->isNullValue())
return -1;
+ auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
+ auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+ if (GlobalValueL && GlobalValueR) {
+ return cmpGlobalValues(GlobalValueL, GlobalValueR);
+ }
+
if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
return Res;
+ if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+ const auto *SeqR = cast<ConstantDataSequential>(R);
+ // This handles ConstantDataArray and ConstantDataVector. Note that we
+ // compare the two raw data arrays, which might differ depending on the host
+ // endianness. This isn't a problem though, because the endiness of a module
+ // will affect the order of the constants, but this order is the same
+ // for a given input module and host platform.
+ return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+ }
+
switch (L->getValueID()) {
- case Value::UndefValueVal: return TypesRes;
+ case Value::UndefValueVal:
+ case Value::ConstantTokenNoneVal:
+ return TypesRes;
case Value::ConstantIntVal: {
const APInt &LInt = cast<ConstantInt>(L)->getValue();
const APInt &RInt = cast<ConstantInt>(R)->getValue();
@@ -609,19 +752,55 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
}
return 0;
}
- case Value::FunctionVal:
- case Value::GlobalVariableVal:
- case Value::GlobalAliasVal:
- default: // Unknown constant, cast L and R pointers to numbers and compare.
- return cmpNumbers((uint64_t)L, (uint64_t)R);
+ case Value::BlockAddressVal: {
+ const BlockAddress *LBA = cast<BlockAddress>(L);
+ const BlockAddress *RBA = cast<BlockAddress>(R);
+ if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+ return Res;
+ if (LBA->getFunction() == RBA->getFunction()) {
+ // They are BBs in the same function. Order by which comes first in the
+ // BB order of the function. This order is deterministic.
+ Function* F = LBA->getFunction();
+ BasicBlock *LBB = LBA->getBasicBlock();
+ BasicBlock *RBB = RBA->getBasicBlock();
+ if (LBB == RBB)
+ return 0;
+ for(BasicBlock &BB : F->getBasicBlockList()) {
+ if (&BB == LBB) {
+ assert(&BB != RBB);
+ return -1;
+ }
+ if (&BB == RBB)
+ return 1;
+ }
+ llvm_unreachable("Basic Block Address does not point to a basic block in "
+ "its function.");
+ return -1;
+ } else {
+ // cmpValues said the functions are the same. So because they aren't
+ // literally the same pointer, they must respectively be the left and
+ // right functions.
+ assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+ // cmpValues will tell us if these are equivalent BasicBlocks, in the
+ // context of their respective functions.
+ return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+ }
}
+ default: // Unknown constant, abort.
+ DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+ llvm_unreachable("Constant ValueID not recognized.");
+ return -1;
+ }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) {
+ return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
}
/// cmpType - compares two types,
/// defines total ordering among the types set.
/// See method declaration comments for more details.
int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
-
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
@@ -642,10 +821,15 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
llvm_unreachable("Unknown type!");
// Fall through in Release mode.
case Type::IntegerTyID:
- case Type::VectorTyID:
- // TyL == TyR would have returned true earlier.
- return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
-
+ return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+ cast<IntegerType>(TyR)->getBitWidth());
+ case Type::VectorTyID: {
+ VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR);
+ if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements()))
+ return Res;
+ return cmpTypes(VTyL->getElementType(), VTyR->getElementType());
+ }
+ // TyL == TyR would have returned true earlier, because types are uniqued.
case Type::VoidTyID:
case Type::FloatTyID:
case Type::DoubleTyID:
@@ -654,6 +838,7 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
case Type::PPC_FP128TyID:
case Type::LabelTyID:
case Type::MetadataTyID:
+ case Type::TokenTyID:
return 0;
case Type::PointerTyID: {
@@ -759,8 +944,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
return Res;
- return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+ return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+ cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
if (int Res =
@@ -783,20 +968,24 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
return Res;
- return cmpNumbers(
- (uint64_t)CI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+ if (int Res = cmpOperandBundlesSchema(CI, R))
+ return Res;
+ return cmpRangeMetadata(
+ CI->getMetadata(LLVMContext::MD_range),
+ cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
}
- if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
- if (int Res = cmpNumbers(CI->getCallingConv(),
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
+ if (int Res = cmpNumbers(II->getCallingConv(),
cast<InvokeInst>(R)->getCallingConv()))
return Res;
if (int Res =
- cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ return Res;
+ if (int Res = cmpOperandBundlesSchema(II, R))
return Res;
- return cmpNumbers(
- (uint64_t)CI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+ return cmpRangeMetadata(
+ II->getMetadata(LLVMContext::MD_range),
+ cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
@@ -876,9 +1065,8 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
GEPR->accumulateConstantOffset(DL, OffsetR))
return cmpAPInts(OffsetL, OffsetR);
-
- if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
- (uint64_t)GEPR->getPointerOperand()->getType()))
+ if (int Res = cmpTypes(GEPL->getSourceElementType(),
+ GEPR->getSourceElementType()))
return Res;
if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
@@ -892,6 +1080,28 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
return 0;
}
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+ const InlineAsm *R) const {
+ // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+ // the same, otherwise compare the fields.
+ if (L == R)
+ return 0;
+ if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+ return Res;
+ if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+ return Res;
+ if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+ return Res;
+ if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+ return Res;
+ if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+ return Res;
+ if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+ return Res;
+ llvm_unreachable("InlineAsm blocks were not uniqued.");
+ return 0;
+}
+
/// Compare two values used by the two functions under pair-wise comparison. If
/// this is the first time the values are seen, they're added to the mapping so
/// that we will detect mismatches on next use.
@@ -926,7 +1136,7 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
if (InlineAsmL && InlineAsmR)
- return cmpNumbers((uint64_t)L, (uint64_t)R);
+ return cmpInlineAsm(InlineAsmL, InlineAsmR);
if (InlineAsmL)
return 1;
if (InlineAsmR)
@@ -938,12 +1148,13 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
return cmpNumbers(LeftSN.first->second, RightSN.first->second);
}
// Test whether two basic blocks have equivalent behaviour.
-int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+ const BasicBlock *BBR) {
BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
do {
- if (int Res = cmpValues(InstL, InstR))
+ if (int Res = cmpValues(&*InstL, &*InstR))
return Res;
const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
@@ -961,7 +1172,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
if (int Res = cmpGEPs(GEPL, GEPR))
return Res;
} else {
- if (int Res = cmpOperations(InstL, InstR))
+ if (int Res = cmpOperations(&*InstL, &*InstR))
return Res;
assert(InstL->getNumOperands() == InstR->getNumOperands());
@@ -970,11 +1181,8 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
Value *OpR = InstR->getOperand(i);
if (int Res = cmpValues(OpL, OpR))
return Res;
- if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID()))
- return Res;
- // TODO: Already checked in cmpOperation
- if (int Res = cmpTypes(OpL->getType(), OpR->getType()))
- return Res;
+ // cmpValues should ensure this is true.
+ assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
}
}
@@ -990,7 +1198,6 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
// Test whether the two functions have equivalent behaviour.
int FunctionComparator::compare() {
-
sn_mapL.clear();
sn_mapR.clear();
@@ -1001,7 +1208,7 @@ int FunctionComparator::compare() {
return Res;
if (FnL->hasGC()) {
- if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC()))
+ if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
return Res;
}
@@ -1009,7 +1216,7 @@ int FunctionComparator::compare() {
return Res;
if (FnL->hasSection()) {
- if (int Res = cmpStrings(FnL->getSection(), FnR->getSection()))
+ if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
return Res;
}
@@ -1033,7 +1240,7 @@ int FunctionComparator::compare() {
ArgRI = FnR->arg_begin(),
ArgLE = FnL->arg_end();
ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
- if (cmpValues(ArgLI, ArgRI) != 0)
+ if (cmpValues(&*ArgLI, &*ArgRI) != 0)
llvm_unreachable("Arguments repeat!");
}
@@ -1055,7 +1262,7 @@ int FunctionComparator::compare() {
if (int Res = cmpValues(BBL, BBR))
return Res;
- if (int Res = compare(BBL, BBR))
+ if (int Res = cmpBasicBlocks(BBL, BBR))
return Res;
const TerminatorInst *TermL = BBL->getTerminator();
@@ -1074,6 +1281,68 @@ int FunctionComparator::compare() {
}
namespace {
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+ uint64_t Hash;
+public:
+ // Initialize to random constant, so the state isn't zero.
+ HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+ void add(uint64_t V) {
+ Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+ }
+ // No finishing is required, because the entire hash value is used.
+ uint64_t getHash() { return Hash; }
+};
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+ HashAccumulator64 H;
+ H.add(F.isVarArg());
+ H.add(F.arg_size());
+
+ SmallVector<const BasicBlock *, 8> BBs;
+ SmallSet<const BasicBlock *, 16> VisitedBBs;
+
+ // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+ // accumulating the hash of the function "structure." (BB and opcode sequence)
+ BBs.push_back(&F.getEntryBlock());
+ VisitedBBs.insert(BBs[0]);
+ while (!BBs.empty()) {
+ const BasicBlock *BB = BBs.pop_back_val();
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+ H.add(45798);
+ for (auto &Inst : *BB) {
+ H.add(Inst.getOpcode());
+ }
+ const TerminatorInst *Term = BB->getTerminator();
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+ continue;
+ BBs.push_back(Term->getSuccessor(i));
+ }
+ }
+ return H.getHash();
+}
+
+
+namespace {
/// MergeFunctions finds functions which will generate identical machine code,
/// by considering all pointer types to be equivalent. Once identified,
@@ -1084,14 +1353,31 @@ class MergeFunctions : public ModulePass {
public:
static char ID;
MergeFunctions()
- : ModulePass(ID), HasGlobalAliases(false) {
+ : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
+ HasGlobalAliases(false) {
initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override;
private:
- typedef std::set<FunctionNode> FnTreeType;
+ // The function comparison operator is provided here so that FunctionNodes do
+ // not need to become larger with another pointer.
+ class FunctionNodeCmp {
+ GlobalNumberState* GlobalNumbers;
+ public:
+ FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
+ bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
+ // Order first by hashes, then full function comparison.
+ if (LHS.getHash() != RHS.getHash())
+ return LHS.getHash() < RHS.getHash();
+ FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
+ return FCmp.compare() == -1;
+ }
+ };
+ typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
+
+ GlobalNumberState GlobalNumbers;
/// A work queue of functions that may have been modified and should be
/// analyzed again.
@@ -1133,17 +1419,23 @@ private:
void writeAlias(Function *F, Function *G);
/// Replace function F with function G in the function tree.
- void replaceFunctionInTree(FnTreeType::iterator &IterToF, Function *G);
+ void replaceFunctionInTree(const FunctionNode &FN, Function *G);
/// The set of all distinct functions. Use the insert() and remove() methods
- /// to modify it.
+ /// to modify it. The map allows efficient lookup and deferring of Functions.
FnTreeType FnTree;
+ // Map functions to the iterators of the FunctionNode which contains them
+ // in the FnTree. This must be updated carefully whenever the FnTree is
+ // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
+ // dangling iterators into FnTree. The invariant that preserves this is that
+ // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
+ ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
/// Whether or not the target supports global aliases.
bool HasGlobalAliases;
};
-} // end anonymous namespace
+} // end anonymous namespace
char MergeFunctions::ID = 0;
INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
@@ -1166,8 +1458,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
Function *F1 = cast<Function>(*I);
Function *F2 = cast<Function>(*J);
- int Res1 = FunctionComparator(F1, F2).compare();
- int Res2 = FunctionComparator(F2, F1).compare();
+ int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
+ int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
// If F1 <= F2, then F2 >= F1, otherwise report failure.
if (Res1 != -Res2) {
@@ -1188,8 +1480,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
continue;
Function *F3 = cast<Function>(*K);
- int Res3 = FunctionComparator(F1, F3).compare();
- int Res4 = FunctionComparator(F2, F3).compare();
+ int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
+ int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
bool Transitive = true;
@@ -1227,11 +1519,33 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
- Deferred.push_back(WeakVH(I));
+ // All functions in the module, ordered by hash. Functions with a unique
+ // hash value are easily eliminated.
+ std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
+ HashedFuncs;
+ for (Function &Func : M) {
+ if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+ HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
+ }
}
+ std::stable_sort(
+ HashedFuncs.begin(), HashedFuncs.end(),
+ [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
+ const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
+ return a.first < b.first;
+ });
+
+ auto S = HashedFuncs.begin();
+ for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
+ // If the hash value matches the previous value or the next one, we must
+ // consider merging it. Otherwise it is dropped and never considered again.
+ if ((I != S && std::prev(I)->first == I->first) ||
+ (std::next(I) != IE && std::next(I)->first == I->first) ) {
+ Deferred.push_back(WeakVH(I->second));
+ }
+ }
+
do {
std::vector<WeakVH> Worklist;
Deferred.swap(Worklist);
@@ -1270,6 +1584,7 @@ bool MergeFunctions::runOnModule(Module &M) {
} while (!Deferred.empty());
FnTree.clear();
+ GlobalNumbers.clear();
return Changed;
}
@@ -1282,6 +1597,32 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
++UI;
CallSite CS(U->getUser());
if (CS && CS.isCallee(U)) {
+ // Transfer the called function's attributes to the call site. Due to the
+ // bitcast we will 'lose' ABI changing attributes because the 'called
+ // function' is no longer a Function* but the bitcast. Code that looks up
+ // the attributes from the called function will fail.
+
+ // FIXME: This is not actually true, at least not anymore. The callsite
+ // will always have the same ABI affecting attributes as the callee,
+ // because otherwise the original input has UB. Note that Old and New
+ // always have matching ABI, so no attributes need to be changed.
+ // Transferring other attributes may help other optimizations, but that
+ // should be done uniformly and not in this ad-hoc way.
+ auto &Context = New->getContext();
+ auto NewFuncAttrs = New->getAttributes();
+ auto CallSiteAttrs = CS.getAttributes();
+
+ CallSiteAttrs = CallSiteAttrs.addAttributes(
+ Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
+
+ for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
+ AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
+ if (Attrs.getNumSlots())
+ CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
+ }
+
+ CS.setAttributes(CallSiteAttrs);
+
remove(CS.getInstruction()->getParent()->getParent());
U->set(BitcastNew);
}
@@ -1352,15 +1693,15 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
SmallVector<Value *, 16> Args;
unsigned i = 0;
FunctionType *FFTy = F->getFunctionType();
- for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
- AI != AE; ++AI) {
- Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i)));
+ for (Argument & AI : NewG->args()) {
+ Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
++i;
}
CallInst *CI = Builder.CreateCall(F, Args);
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
+ CI->setAttributes(F->getAttributes());
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
@@ -1379,8 +1720,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
// Replace G with an alias to F and delete G.
void MergeFunctions::writeAlias(Function *F, Function *G) {
- PointerType *PTy = G->getType();
- auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F);
+ auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
@@ -1425,19 +1765,24 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
++NumFunctionsMerged;
}
-/// Replace function F for function G in the map.
-void MergeFunctions::replaceFunctionInTree(FnTreeType::iterator &IterToF,
+/// Replace function F by function G.
+void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
Function *G) {
- Function *F = IterToF->getFunc();
-
- // A total order is already guaranteed otherwise because we process strong
- // functions before weak functions.
- assert(((F->mayBeOverridden() && G->mayBeOverridden()) ||
- (!F->mayBeOverridden() && !G->mayBeOverridden())) &&
- "Only change functions if both are strong or both are weak");
- (void)F;
-
- IterToF->replaceBy(G);
+ Function *F = FN.getFunc();
+ assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
+ "The two functions must be equal");
+
+ auto I = FNodesInTree.find(F);
+ assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
+ assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
+
+ FnTreeType::iterator IterToFNInFnTree = I->second;
+ assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
+ // Remove F -> FN and insert G -> FN
+ FNodesInTree.erase(I);
+ FNodesInTree.insert({G, IterToFNInFnTree});
+ // Replace F with G in FN, which is stored inside the FnTree.
+ FN.replaceBy(G);
}
// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
@@ -1447,6 +1792,8 @@ bool MergeFunctions::insert(Function *NewFunction) {
FnTree.insert(FunctionNode(NewFunction));
if (Result.second) {
+ assert(FNodesInTree.count(NewFunction) == 0);
+ FNodesInTree.insert({NewFunction, Result.first});
DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
return false;
}
@@ -1476,7 +1823,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
if (OldF.getFunc()->getName() > NewFunction->getName()) {
// Swap the two functions.
Function *F = OldF.getFunc();
- replaceFunctionInTree(Result.first, NewFunction);
+ replaceFunctionInTree(*Result.first, NewFunction);
NewFunction = F;
assert(OldF.getFunc() != F && "Must have swapped the functions.");
}
@@ -1495,18 +1842,13 @@ bool MergeFunctions::insert(Function *NewFunction) {
// Remove a function from FnTree. If it was already in FnTree, add
// it to Deferred so that we'll look at it in the next round.
void MergeFunctions::remove(Function *F) {
- // We need to make sure we remove F, not a function "equal" to F per the
- // function equality comparator.
- FnTreeType::iterator found = FnTree.find(FunctionNode(F));
- size_t Erased = 0;
- if (found != FnTree.end() && found->getFunc() == F) {
- Erased = 1;
- FnTree.erase(found);
- }
-
- if (Erased) {
- DEBUG(dbgs() << "Removed " << F->getName()
- << " from set and deferred it.\n");
+ auto I = FNodesInTree.find(F);
+ if (I != FNodesInTree.end()) {
+ DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n");
+ FnTree.erase(I->second);
+ // I->second has been invalidated, remove it from the FNodesInTree map to
+ // preserve the invariant.
+ FNodesInTree.erase(I);
Deferred.emplace_back(F);
}
}
@@ -1516,6 +1858,8 @@ void MergeFunctions::remove(Function *F) {
void MergeFunctions::removeUsers(Value *V) {
std::vector<Value *> Worklist;
Worklist.push_back(V);
+ SmallSet<Value*, 8> Visited;
+ Visited.insert(V);
while (!Worklist.empty()) {
Value *V = Worklist.back();
Worklist.pop_back();
@@ -1526,8 +1870,10 @@ void MergeFunctions::removeUsers(Value *V) {
} else if (isa<GlobalValue>(U)) {
// do nothing
} else if (Constant *C = dyn_cast<Constant>(U)) {
- for (User *UU : C->users())
- Worklist.push_back(UU);
+ for (User *UU : C->users()) {
+ if (!Visited.insert(UU).second)
+ Worklist.push_back(UU);
+ }
}
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 4a7cb7b..0c5c84b 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -50,7 +50,7 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
Function* PartialInliner::unswitchFunction(Function* F) {
// First, verify that this function is an unswitching candidate...
- BasicBlock* entryBlock = F->begin();
+ BasicBlock *entryBlock = &F->front();
BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
if (!BR || BR->isUnconditional())
return nullptr;
@@ -89,18 +89,18 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// of which will go outside.
BasicBlock* preReturn = newReturnBlock;
newReturnBlock = newReturnBlock->splitBasicBlock(
- newReturnBlock->getFirstNonPHI());
+ newReturnBlock->getFirstNonPHI()->getIterator());
BasicBlock::iterator I = preReturn->begin();
- BasicBlock::iterator Ins = newReturnBlock->begin();
+ Instruction *Ins = &newReturnBlock->front();
while (I != preReturn->end()) {
PHINode* OldPhi = dyn_cast<PHINode>(I);
if (!OldPhi) break;
-
- PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+
+ PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
OldPhi->replaceAllUsesWith(retPhi);
Ins = newReturnBlock->getFirstNonPHI();
-
- retPhi->addIncoming(I, preReturn);
+
+ retPhi->addIncoming(&*I, preReturn);
retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
newEntryBlock);
OldPhi->removeIncomingValue(newEntryBlock);
@@ -116,8 +116,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
FE = duplicateFunction->end(); FI != FE; ++FI)
if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
&*FI != newNonReturnBlock)
- toExtract.push_back(FI);
-
+ toExtract.push_back(&*FI);
+
// The CodeExtractor needs a dominator tree.
DominatorTree DT;
DT.recalculate(*duplicateFunction);
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 909baae..9876efa 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -12,19 +12,26 @@
//
//===----------------------------------------------------------------------===//
-
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/IR/FunctionInfo.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
@@ -89,11 +96,21 @@ static cl::opt<bool> EnableLoopDistribute(
"enable-loop-distribute", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopDistribution Pass"));
+static cl::opt<bool> EnableNonLTOGlobalsModRef(
+ "enable-non-lto-gmr", cl::init(true), cl::Hidden,
+ cl::desc(
+ "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
+
+static cl::opt<bool> EnableLoopLoadElim(
+ "enable-loop-load-elim", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopLoadElimination Pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
LibraryInfo = nullptr;
Inliner = nullptr;
+ FunctionIndex = nullptr;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;
@@ -143,10 +160,9 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses(
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
if (UseCFLAA)
- PM.add(createCFLAliasAnalysisPass());
- PM.add(createTypeBasedAliasAnalysisPass());
- PM.add(createScopedNoAliasAAPass());
- PM.add(createBasicAliasAnalysisPass());
+ PM.add(createCFLAAWrapperPass());
+ PM.add(createTypeBasedAAWrapperPass());
+ PM.add(createScopedNoAliasAAWrapperPass());
}
void PassManagerBuilder::populateFunctionPassManager(
@@ -172,6 +188,9 @@ void PassManagerBuilder::populateFunctionPassManager(
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
+ // Allow forcing function attributes as a debugging and tuning aid.
+ MPM.add(createForceFunctionAttrsLegacyPass());
+
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
if (OptLevel == 0) {
@@ -201,10 +220,15 @@ void PassManagerBuilder::populateModulePassManager(
addInitialAliasAnalysisPasses(MPM);
if (!DisableUnitAtATime) {
+ // Infer attributes about declarations if possible.
+ MPM.add(createInferFunctionAttrsLegacyPass());
+
addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+ // Promote any localized global vars
+ MPM.add(createPromoteMemoryToRegisterPass());
MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
@@ -213,6 +237,12 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
+ if (EnableNonLTOGlobalsModRef)
+ // We add a module alias analysis pass here. In part due to bugs in the
+ // analysis infrastructure this "works" in that the analysis stays alive
+ // for the entire SCC pass run below.
+ MPM.add(createGlobalsAAWrapperPass());
+
// Start of CallGraph SCC passes.
if (!DisableUnitAtATime)
MPM.add(createPruneEHPass()); // Remove dead EH info
@@ -245,6 +275,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
@@ -315,9 +346,42 @@ void PassManagerBuilder::populateModulePassManager(
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
+ // Remove avail extern fns and globals definitions if we aren't
+ // compiling an object file for later LTO. For LTO we want to preserve
+ // these so they are eligible for inlining at link-time. Note if they
+ // are unreferenced they will be removed by GlobalDCE later, so
+ // this only impacts referenced available externally globals.
+ // Eventually they will be suppressed during codegen, but eliminating
+ // here enables more opportunity for GlobalDCE as it may make
+ // globals referenced by available external functions dead
+ // and saves running remaining passes on the eliminated functions.
+ MPM.add(createEliminateAvailableExternallyPass());
+ }
+
+ if (EnableNonLTOGlobalsModRef)
+ // We add a fresh GlobalsModRef run at this point. This is particularly
+ // useful as the above will have inlined, DCE'ed, and function-attr
+ // propagated everything. We should at this point have a reasonably minimal
+ // and richly annotated call graph. By computing aliasing and mod/ref
+ // information for all local globals here, the late loop passes and notably
+ // the vectorizer will be able to use them to help recognize vectorizable
+ // memory operations.
+ //
+ // Note that this relies on a bug in the pass manager which preserves
+ // a module analysis into a function pass pipeline (and throughout it) so
+ // long as the first function pass doesn't invalidate the module analysis.
+ // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
+ // this to work. Fortunately, it is trivial to preserve AliasAnalysis
+ // (doing nothing preserves it as it is required to be conservatively
+ // correct in the face of IR changes).
+ MPM.add(createGlobalsAAWrapperPass());
+
if (RunFloat2Int)
MPM.add(createFloat2IntPass());
+ addExtensionsToPM(EP_VectorizerStart, MPM);
+
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form. Disable header duplication at -Oz.
@@ -329,6 +393,12 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopDistributePass());
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ if (EnableLoopLoadElim)
+ MPM.add(createLoopLoadEliminationPass());
+
// FIXME: Because of #pragma vectorize enable, the passes below are always
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
// on -O1 and no #pragma is found). Would be good to have these two passes
@@ -402,17 +472,6 @@ void PassManagerBuilder::populateModulePassManager(
// GlobalOpt already deletes dead functions and globals, at -O2 try a
// late pass of GlobalDCE. It is capable of deleting dead cycles.
if (OptLevel > 1) {
- if (!PrepareForLTO) {
- // Remove avail extern fns and globals definitions if we aren't
- // compiling an object file for later LTO. For LTO we want to preserve
- // these so they are eligible for inlining at link-time. Note if they
- // are unreferenced they will be removed by GlobalDCE below, so
- // this only impacts referenced available externally globals.
- // Eventually they will be suppressed during codegen, but eliminating
- // here enables more opportunity for GlobalDCE as it may make
- // globals referenced by available external functions dead.
- MPM.add(createEliminateAvailableExternallyPass());
- }
MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
MPM.add(createConstantMergePass()); // Merge dup global constants
}
@@ -428,13 +487,25 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
+ if (FunctionIndex)
+ PM.add(createFunctionImportPass(FunctionIndex));
+
+ // Allow forcing function attributes as a debugging and tuning aid.
+ PM.add(createForceFunctionAttrsLegacyPass());
+
+ // Infer attributes about declarations if possible.
+ PM.add(createInferFunctionAttrsLegacyPass());
+
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
PM.add(createIPSCCPPass());
// Now that we internalized some globals, see if we can hack on them!
+ PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
PM.add(createGlobalOptimizerPass());
+ // Promote any localized global vars.
+ PM.add(createPromoteMemoryToRegisterPass());
// Linking modules together can lead to duplicated global constants, only
// keep one copy of each constant.
@@ -481,7 +552,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createFunctionAttrsPass()); // Add nocapture.
- PM.add(createGlobalsModRefPass()); // IP alias analysis.
+ PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
if (EnableMLSM)
@@ -500,6 +571,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createLoopVectorizePass(true, LoopVectorize));
+ // Now that we've optimized loops (in particular loop induction variables),
+ // we may have exposed more scalar opportunities. Run parts of the scalar
+ // optimizer again at this point.
+ PM.add(createInstructionCombiningPass()); // Initial cleanup
+ PM.add(createCFGSimplificationPass()); // if-convert
+ PM.add(createSCCPPass()); // Propagate exposed constants
+ PM.add(createInstructionCombiningPass()); // Clean up again
+ PM.add(createBitTrackingDCEPass());
+
// More scalar chains could be vectorized due to more alias information
if (RunSLPAfterLoopVectorization)
if (SLPVectorize)
@@ -524,6 +604,9 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
+ // Drop bodies of available externally objects to improve GlobalDCE.
+ PM.add(createEliminateAvailableExternallyPass());
+
// Now that we have optimized the program, discard unreachable functions.
PM.add(createGlobalDCEPass());
@@ -543,6 +626,10 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (OptLevel > 1)
addLTOOptimizationPasses(PM);
+ // Create a function that performs CFI checks for cross-DSO calls with targets
+ // in the current module.
+ PM.add(createCrossDSOCFIPass());
+
// Lower bit sets to globals. This pass supports Clang's control flow
// integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
// is enabled. The pass does nothing if CFI is disabled.
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index b2f1010..3af4afb 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -21,7 +21,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -153,21 +153,16 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// If the SCC doesn't unwind or doesn't throw, note this fact.
if (!SCCMightUnwind || !SCCMightReturn)
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- AttrBuilder NewAttributes;
-
- if (!SCCMightUnwind)
- NewAttributes.addAttribute(Attribute::NoUnwind);
- if (!SCCMightReturn)
- NewAttributes.addAttribute(Attribute::NoReturn);
-
Function *F = (*I)->getFunction();
- const AttributeSet &PAL = F->getAttributes().getFnAttributes();
- const AttributeSet &NPAL = AttributeSet::get(
- F->getContext(), AttributeSet::FunctionIndex, NewAttributes);
- if (PAL != NPAL) {
+ if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
+ F->addFnAttr(Attribute::NoUnwind);
+ MadeChange = true;
+ }
+
+ if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) {
+ F->addFnAttr(Attribute::NoReturn);
MadeChange = true;
- F->addAttributes(AttributeSet::FunctionIndex, NPAL);
}
}
@@ -191,9 +186,13 @@ bool PruneEH::SimplifyFunction(Function *F) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+
// Insert a call instruction before the invoke.
- CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+ "", II);
Call->takeName(II);
Call->setCallingConv(II->getCallingConv());
Call->setAttributes(II->getAttributes());
@@ -233,7 +232,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
// Remove the uncond branch and add an unreachable.
BB->getInstList().pop_back();
- new UnreachableInst(BB->getContext(), BB);
+ new UnreachableInst(BB->getContext(), &*BB);
DeleteBasicBlock(New); // Delete the new BB.
MadeChange = true;
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
new file mode 100644
index 0000000..928d92e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -0,0 +1,1265 @@
+//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader transformation. This pass
+// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
+// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
+// profile information in the given profile.
+//
+// This pass generates branch weight annotations on the IR:
+//
+// - prof: Represents branch weights. This annotation is added to branches
+// to indicate the weights of each edge coming out of the branch.
+// The weight of each edge is the weight of the target block for
+// that edge. The weight of a block B is computed as the maximum
+// number of samples found in B.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <cctype>
+
+using namespace llvm;
+using namespace sampleprof;
+
+#define DEBUG_TYPE "sample-profile"
+
+// Command line option to specify the file to read samples from. This is
+// mainly used for debugging.
+static cl::opt<std::string> SampleProfileFile(
+ "sample-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
+ "sample-profile-max-propagate-iterations", cl::init(100),
+ cl::desc("Maximum number of iterations to go through when propagating "
+ "sample block/edge weights through the CFG."));
+static cl::opt<unsigned> SampleProfileRecordCoverage(
+ "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of records in the input profile "
+ "are matched to the IR."));
+static cl::opt<unsigned> SampleProfileSampleCoverage(
+ "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of samples in the input profile "
+ "are matched to the IR."));
+static cl::opt<double> SampleProfileHotThreshold(
+ "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
+ cl::desc("Inlined functions that account for more than N% of all samples "
+ "collected in the parent function, will be inlined again."));
+static cl::opt<double> SampleProfileGlobalHotThreshold(
+ "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for more than N% of all samples "
+ "collected in the profile, will be marked as hot for the inliner "
+ "to consider."));
+static cl::opt<double> SampleProfileGlobalColdThreshold(
+ "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for less than N% of all samples "
+ "collected in the profile, will be marked as cold for the inliner "
+ "to consider."));
+
+namespace {
+typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
+typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
+typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
+typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
+typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
+ BlockEdgeMap;
+
+/// \brief Sample profile pass.
+///
+/// This pass reads profile data from the file specified by
+/// -sample-profile-file and annotates every affected function with the
+/// profile information found in that file.
+class SampleProfileLoader : public ModulePass {
+public:
+ // Class identification, replacement for typeinfo
+ static char ID;
+
+ SampleProfileLoader(StringRef Name = SampleProfileFile)
+ : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
+ Samples(nullptr), Filename(Name), ProfileIsValid(false),
+ TotalCollectedSamples(0) {
+ initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override;
+
+ void dump() { Reader->dump(); }
+
+ const char *getPassName() const override { return "Sample profile pass"; }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+
+protected:
+ bool runOnFunction(Function &F);
+ unsigned getFunctionLoc(Function &F);
+ bool emitAnnotations(Function &F);
+ ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
+ ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
+ const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
+ const FunctionSamples *findFunctionSamples(const Instruction &I) const;
+ bool inlineHotFunctions(Function &F);
+ bool emitInlineHints(Function &F);
+ void printEdgeWeight(raw_ostream &OS, Edge E);
+ void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
+ void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
+ bool computeBlockWeights(Function &F);
+ void findEquivalenceClasses(Function &F);
+ void findEquivalencesFor(BasicBlock *BB1,
+ SmallVector<BasicBlock *, 8> Descendants,
+ DominatorTreeBase<BasicBlock> *DomTree);
+ void propagateWeights(Function &F);
+ uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
+ void buildEdges(Function &F);
+ bool propagateThroughEdges(Function &F);
+ void computeDominanceAndLoopInfo(Function &F);
+ unsigned getOffset(unsigned L, unsigned H) const;
+ void clearFunctionData();
+
+ /// \brief Map basic blocks to their computed weights.
+ ///
+ /// The weight of a basic block is defined to be the maximum
+ /// of all the instruction weights in that block.
+ BlockWeightMap BlockWeights;
+
+ /// \brief Map edges to their computed weights.
+ ///
+ /// Edge weights are computed by propagating basic block weights in
+ /// SampleProfile::propagateWeights.
+ EdgeWeightMap EdgeWeights;
+
+ /// \brief Set of visited blocks during propagation.
+ SmallPtrSet<const BasicBlock *, 128> VisitedBlocks;
+
+ /// \brief Set of visited edges during propagation.
+ SmallSet<Edge, 128> VisitedEdges;
+
+ /// \brief Equivalence classes for block weights.
+ ///
+ /// Two blocks BB1 and BB2 are in the same equivalence class if they
+ /// dominate and post-dominate each other, and they are in the same loop
+ /// nest. When this happens, the two blocks are guaranteed to execute
+ /// the same number of times.
+ EquivalenceClassMap EquivalenceClass;
+
+ /// \brief Dominance, post-dominance and loop information.
+ std::unique_ptr<DominatorTree> DT;
+ std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
+ std::unique_ptr<LoopInfo> LI;
+
+ /// \brief Predecessors for each basic block in the CFG.
+ BlockEdgeMap Predecessors;
+
+ /// \brief Successors for each basic block in the CFG.
+ BlockEdgeMap Successors;
+
+ /// \brief Profile reader object.
+ std::unique_ptr<SampleProfileReader> Reader;
+
+ /// \brief Samples collected for the body of this function.
+ FunctionSamples *Samples;
+
+ /// \brief Name of the profile file to load.
+ StringRef Filename;
+
+ /// \brief Flag indicating whether the profile input loaded successfully.
+ bool ProfileIsValid;
+
+ /// \brief Total number of samples collected in this profile.
+ ///
+ /// This is the sum of all the samples collected in all the functions executed
+ /// at runtime.
+ uint64_t TotalCollectedSamples;
+};
+
+class SampleCoverageTracker {
+public:
+ SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+
+ bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
+ uint32_t Discriminator, uint64_t Samples);
+ unsigned computeCoverage(unsigned Used, unsigned Total) const;
+ unsigned countUsedRecords(const FunctionSamples *FS) const;
+ unsigned countBodyRecords(const FunctionSamples *FS) const;
+ uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
+ uint64_t countBodySamples(const FunctionSamples *FS) const;
+ void clear() {
+ SampleCoverage.clear();
+ TotalUsedSamples = 0;
+ }
+
+private:
+ typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
+ typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
+ FunctionSamplesCoverageMap;
+
+ /// Coverage map for sampling records.
+ ///
+ /// This map keeps a record of sampling records that have been matched to
+ /// an IR instruction. This is used to detect some form of staleness in
+ /// profiles (see flag -sample-profile-check-coverage).
+ ///
+ /// Each entry in the map corresponds to a FunctionSamples instance. This is
+ /// another map that counts how many times the sample record at the
+ /// given location has been used.
+ FunctionSamplesCoverageMap SampleCoverage;
+
+ /// Number of samples used from the profile.
+ ///
+ /// When a sampling record is used for the first time, the samples from
+ /// that record are added to this accumulator. Coverage is later computed
+ /// based on the total number of samples available in this function and
+ /// its callsites.
+ ///
+ /// Note that this accumulator tracks samples used from a single function
+ /// and all the inlined callsites. Strictly, we should have a map of counters
+ /// keyed by FunctionSamples pointers, but these stats are cleared after
+ /// every function, so we just need to keep a single counter.
+ uint64_t TotalUsedSamples;
+};
+
+SampleCoverageTracker CoverageTracker;
+
+/// Return true if the given callsite is hot wrt to its caller.
+///
+/// Functions that were inlined in the original binary will be represented
+/// in the inline stack in the sample profile. If the profile shows that
+/// the original inline decision was "good" (i.e., the callsite is executed
+/// frequently), then we will recreate the inline decision and apply the
+/// profile from the inlined callsite.
+///
+/// To decide whether an inlined callsite is hot, we compute the fraction
+/// of samples used by the callsite with respect to the total number of samples
+/// collected in the caller.
+///
+/// If that fraction is larger than the default given by
+/// SampleProfileHotThreshold, the callsite will be inlined again.
+bool callsiteIsHot(const FunctionSamples *CallerFS,
+ const FunctionSamples *CallsiteFS) {
+ if (!CallsiteFS)
+ return false; // The callsite was not inlined in the original binary.
+
+ uint64_t ParentTotalSamples = CallerFS->getTotalSamples();
+ if (ParentTotalSamples == 0)
+ return false; // Avoid division by zero.
+
+ uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
+ if (CallsiteTotalSamples == 0)
+ return false; // Callsite is trivially cold.
+
+ double PercentSamples =
+ (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
+ return PercentSamples >= SampleProfileHotThreshold;
+}
+
+}
+
+/// Mark as used the sample record for the given function samples at
+/// (LineOffset, Discriminator).
+///
+/// \returns true if this is the first time we mark the given record.
+bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
+ uint32_t LineOffset,
+ uint32_t Discriminator,
+ uint64_t Samples) {
+ LineLocation Loc(LineOffset, Discriminator);
+ unsigned &Count = SampleCoverage[FS][Loc];
+ bool FirstTime = (++Count == 1);
+ if (FirstTime)
+ TotalUsedSamples += Samples;
+ return FirstTime;
+}
+
+/// Return the number of sample records that were applied from this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
+ auto I = SampleCoverage.find(FS);
+
+ // The size of the coverage map for FS represents the number of records
+ // that were marked used at least once.
+ unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
+
+ // If there are inlined callsites in this function, count the samples found
+ // in the respective bodies. However, do not bother counting callees with 0
+ // total samples, these are callees that were never invoked at runtime.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countUsedRecords(CalleeSamples);
+ }
+
+ return Count;
+}
+
+/// Return the number of sample records in the body of this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
+ unsigned Count = FS->getBodySamples().size();
+
+ // Only count records in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countBodyRecords(CalleeSamples);
+ }
+
+ return Count;
+}
+
+/// Return the number of samples collected in the body of this profile.
+///
+/// This count does not include samples from cold inlined callsites.
+uint64_t
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
+ uint64_t Total = 0;
+ for (const auto &I : FS->getBodySamples())
+ Total += I.second.getSamples();
+
+ // Only count samples in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Total += countBodySamples(CalleeSamples);
+ }
+
+ return Total;
+}
+
+/// Return the fraction of sample records used in this profile.
+///
+/// The returned value is an unsigned integer in the range 0-100 indicating
+/// the percentage of sample records that were used while applying this
+/// profile to the associated function.
+unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
+ unsigned Total) const {
+ assert(Used <= Total &&
+ "number of used records cannot exceed the total number of records");
+ return Total > 0 ? Used * 100 / Total : 100;
+}
+
+/// Clear all the per-function data used to load samples and propagate weights.
+void SampleProfileLoader::clearFunctionData() {
+ BlockWeights.clear();
+ EdgeWeights.clear();
+ VisitedBlocks.clear();
+ VisitedEdges.clear();
+ EquivalenceClass.clear();
+ DT = nullptr;
+ PDT = nullptr;
+ LI = nullptr;
+ Predecessors.clear();
+ Successors.clear();
+ CoverageTracker.clear();
+}
+
+/// \brief Returns the offset of lineno \p L to head_lineno \p H
+///
+/// \param L Lineno
+/// \param H Header lineno of the function
+///
+/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// We assume that a single function will not exceed 65535 LOC.
+unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
+ return (L - H) & 0xffff;
+}
+
+/// \brief Print the weight of edge \p E on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param E Edge to print.
+void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
+ OS << "weight[" << E.first->getName() << "->" << E.second->getName()
+ << "]: " << EdgeWeights[E] << "\n";
+}
+
+/// \brief Print the equivalence class of block \p BB on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param BB Block to print.
+void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
+ const BasicBlock *BB) {
+ const BasicBlock *Equiv = EquivalenceClass[BB];
+ OS << "equivalence[" << BB->getName()
+ << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
+}
+
+/// \brief Print the weight of block \p BB on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param BB Block to print.
+void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
+ const BasicBlock *BB) const {
+ const auto &I = BlockWeights.find(BB);
+ uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
+ OS << "weight[" << BB->getName() << "]: " << W << "\n";
+}
+
+/// \brief Get the weight for an instruction.
+///
+/// The "weight" of an instruction \p Inst is the number of samples
+/// collected on that instruction at runtime. To retrieve it, we
+/// need to compute the line number of \p Inst relative to the start of its
+/// function. We use HeaderLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using BodySamples.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the weight of \p Inst.
+ErrorOr<uint64_t>
+SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
+ DebugLoc DLoc = Inst.getDebugLoc();
+ if (!DLoc)
+ return std::error_code();
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (!FS)
+ return std::error_code();
+
+ const DILocation *DIL = DLoc;
+ unsigned Lineno = DLoc.getLine();
+ unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
+
+ uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
+ uint32_t Discriminator = DIL->getDiscriminator();
+ ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
+ if (R) {
+ bool FirstMark =
+ CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
+ if (FirstMark) {
+ const Function *F = Inst.getParent()->getParent();
+ LLVMContext &Ctx = F->getContext();
+ emitOptimizationRemark(
+ Ctx, DEBUG_TYPE, *F, DLoc,
+ Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
+ Twine(LineOffset) +
+ ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
+ }
+ DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
+ << Inst << " (line offset: " << Lineno - HeaderLineno << "."
+ << DIL->getDiscriminator() << " - weight: " << R.get()
+ << ")\n");
+ }
+ return R;
+}
+
+/// \brief Compute the weight of a basic block.
+///
+/// The weight of basic block \p BB is the maximum weight of all the
+/// instructions in BB.
+///
+/// \param BB The basic block to query.
+///
+/// \returns the weight for \p BB.
+ErrorOr<uint64_t>
+SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
+ bool Found = false;
+ uint64_t Weight = 0;
+ for (auto &I : BB->getInstList()) {
+ const ErrorOr<uint64_t> &R = getInstWeight(I);
+ if (R && R.get() >= Weight) {
+ Weight = R.get();
+ Found = true;
+ }
+ }
+ if (Found)
+ return Weight;
+ else
+ return std::error_code();
+}
+
+/// \brief Compute and store the weights of every basic block.
+///
+/// This populates the BlockWeights map by computing
+/// the weights of every basic block in the CFG.
+///
+/// \param F The function to query.
+bool SampleProfileLoader::computeBlockWeights(Function &F) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Block weights\n");
+ for (const auto &BB : F) {
+ ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
+ if (Weight) {
+ BlockWeights[&BB] = Weight.get();
+ VisitedBlocks.insert(&BB);
+ Changed = true;
+ }
+ DEBUG(printBlockWeight(dbgs(), &BB));
+ }
+
+ return Changed;
+}
+
+/// \brief Get the FunctionSamples for a call instruction.
+///
+/// The FunctionSamples of a call instruction \p Inst is the inlined
+/// instance in which that call instruction is calling to. It contains
+/// all samples that resides in the inlined instance. We first find the
+/// inlined instance in which the call instruction is from, then we
+/// traverse its children to find the callsite with the matching
+/// location and callee function name.
+///
+/// \param Inst Call instruction to query.
+///
+/// \returns The FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
+ const DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL) {
+ return nullptr;
+ }
+ DISubprogram *SP = DIL->getScope()->getSubprogram();
+ if (!SP)
+ return nullptr;
+
+ Function *CalleeFunc = Inst.getCalledFunction();
+ if (!CalleeFunc) {
+ return nullptr;
+ }
+
+ StringRef CalleeName = CalleeFunc->getName();
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (FS == nullptr)
+ return nullptr;
+
+ return FS->findFunctionSamplesAt(
+ CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator(), CalleeName));
+}
+
+/// \brief Get the FunctionSamples for an instruction.
+///
+/// The FunctionSamples of an instruction \p Inst is the inlined instance
+/// in which that instruction is coming from. We traverse the inline stack
+/// of that instruction, and match it with the tree nodes in the profile.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
+ SmallVector<CallsiteLocation, 10> S;
+ const DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL) {
+ return Samples;
+ }
+ StringRef CalleeName;
+ for (const DILocation *DIL = Inst.getDebugLoc(); DIL;
+ DIL = DIL->getInlinedAt()) {
+ DISubprogram *SP = DIL->getScope()->getSubprogram();
+ if (!SP)
+ return nullptr;
+ if (!CalleeName.empty()) {
+ S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator(), CalleeName));
+ }
+ CalleeName = SP->getLinkageName();
+ }
+ if (S.size() == 0)
+ return Samples;
+ const FunctionSamples *FS = Samples;
+ for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
+ FS = FS->findFunctionSamplesAt(S[i]);
+ }
+ return FS;
+}
+
+/// \brief Emit an inline hint if \p F is globally hot or cold.
+///
+/// If \p F consumes a significant fraction of samples (indicated by
+/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
+/// inliner to consider the function hot.
+///
+/// If \p F consumes a small fraction of samples (indicated by
+/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
+/// to consider the function cold.
+///
+/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
+/// function globally hot or cold, we should be annotating individual callsites.
+/// This is not currently possible, but work on the inliner will eventually
+/// provide this ability. See http://reviews.llvm.org/D15003 for details and
+/// discussion.
+///
+/// \returns True if either attribute was applied to \p F.
+bool SampleProfileLoader::emitInlineHints(Function &F) {
+ if (TotalCollectedSamples == 0)
+ return false;
+
+ uint64_t FunctionSamples = Samples->getTotalSamples();
+ double SamplesPercent =
+ (double)FunctionSamples / (double)TotalCollectedSamples * 100.0;
+
+ // If the function collected more samples than the hot threshold, mark
+ // it globally hot.
+ if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
+ F.addFnAttr(llvm::Attribute::InlineHint);
+ std::string Msg;
+ raw_string_ostream S(Msg);
+ S << "Applied inline hint to globally hot function '" << F.getName()
+ << "' with " << format("%.2f", SamplesPercent)
+ << "% of samples (threshold: "
+ << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)";
+ S.flush();
+ emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+ return true;
+ }
+
+ // If the function collected fewer samples than the cold threshold, mark
+ // it globally cold.
+ if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
+ F.addFnAttr(llvm::Attribute::Cold);
+ std::string Msg;
+ raw_string_ostream S(Msg);
+ S << "Applied cold hint to globally cold function '" << F.getName()
+ << "' with " << format("%.2f", SamplesPercent)
+ << "% of samples (threshold: "
+ << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)";
+ S.flush();
+ emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Iteratively inline hot callsites of a function.
+///
+/// Iteratively traverse all callsites of the function \p F, and find if
+/// the corresponding inlined instance exists and is hot in profile. If
+/// it is hot enough, inline the callsites and adds new callsites of the
+/// callee into the caller.
+///
+/// TODO: investigate the possibility of not invoking InlineFunction directly.
+///
+/// \param F function to perform iterative inlining.
+///
+/// \returns True if there is any inline happened.
+bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+ bool Changed = false;
+ LLVMContext &Ctx = F.getContext();
+ while (true) {
+ bool LocalChanged = false;
+ SmallVector<CallInst *, 10> CIS;
+ for (auto &BB : F) {
+ for (auto &I : BB.getInstList()) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
+ CIS.push_back(CI);
+ }
+ }
+ for (auto CI : CIS) {
+ InlineFunctionInfo IFI;
+ Function *CalledFunction = CI->getCalledFunction();
+ DebugLoc DLoc = CI->getDebugLoc();
+ uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
+ if (InlineFunction(CI, IFI)) {
+ LocalChanged = true;
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
+ Twine("inlined hot callee '") +
+ CalledFunction->getName() + "' with " +
+ Twine(NumSamples) + " samples into '" +
+ F.getName() + "'");
+ }
+ }
+ if (LocalChanged) {
+ Changed = true;
+ } else {
+ break;
+ }
+ }
+ return Changed;
+}
+
+/// \brief Find equivalence classes for the given block.
+///
+/// This finds all the blocks that are guaranteed to execute the same
+/// number of times as \p BB1. To do this, it traverses all the
+/// descendants of \p BB1 in the dominator or post-dominator tree.
+///
+/// A block BB2 will be in the same equivalence class as \p BB1 if
+/// the following holds:
+///
+/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
+/// is a descendant of \p BB1 in the dominator tree, then BB2 should
+/// dominate BB1 in the post-dominator tree.
+///
+/// 2- Both BB2 and \p BB1 must be in the same loop.
+///
+/// For every block BB2 that meets those two requirements, we set BB2's
+/// equivalence class to \p BB1.
+///
+/// \param BB1 Block to check.
+/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
+/// \param DomTree Opposite dominator tree. If \p Descendants is filled
+/// with blocks from \p BB1's dominator tree, then
+/// this is the post-dominator tree, and vice versa.
+void SampleProfileLoader::findEquivalencesFor(
+ BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
+ DominatorTreeBase<BasicBlock> *DomTree) {
+ const BasicBlock *EC = EquivalenceClass[BB1];
+ uint64_t Weight = BlockWeights[EC];
+ for (const auto *BB2 : Descendants) {
+ bool IsDomParent = DomTree->dominates(BB2, BB1);
+ bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
+ if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
+ EquivalenceClass[BB2] = EC;
+
+ // If BB2 is heavier than BB1, make BB2 have the same weight
+ // as BB1.
+ //
+ // Note that we don't worry about the opposite situation here
+ // (when BB2 is lighter than BB1). We will deal with this
+ // during the propagation phase. Right now, we just want to
+ // make sure that BB1 has the largest weight of all the
+ // members of its equivalence set.
+ Weight = std::max(Weight, BlockWeights[BB2]);
+ }
+ }
+ BlockWeights[EC] = Weight;
+}
+
+/// \brief Find equivalence classes.
+///
+/// Since samples may be missing from blocks, we can fill in the gaps by setting
+/// the weights of all the blocks in the same equivalence class to the same
+/// weight. To compute the concept of equivalence, we use dominance and loop
+/// information. Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
+///
+/// \param F The function to query.
+void SampleProfileLoader::findEquivalenceClasses(Function &F) {
+ SmallVector<BasicBlock *, 8> DominatedBBs;
+ DEBUG(dbgs() << "\nBlock equivalence classes\n");
+ // Find equivalence sets based on dominance and post-dominance information.
+ for (auto &BB : F) {
+ BasicBlock *BB1 = &BB;
+
+ // Compute BB1's equivalence class once.
+ if (EquivalenceClass.count(BB1)) {
+ DEBUG(printBlockEquivalence(dbgs(), BB1));
+ continue;
+ }
+
+ // By default, blocks are in their own equivalence class.
+ EquivalenceClass[BB1] = BB1;
+
+ // Traverse all the blocks dominated by BB1. We are looking for
+ // every basic block BB2 such that:
+ //
+ // 1- BB1 dominates BB2.
+ // 2- BB2 post-dominates BB1.
+ // 3- BB1 and BB2 are in the same loop nest.
+ //
+ // If all those conditions hold, it means that BB2 is executed
+ // as many times as BB1, so they are placed in the same equivalence
+ // class by making BB2's equivalence class be BB1.
+ DominatedBBs.clear();
+ DT->getDescendants(BB1, DominatedBBs);
+ findEquivalencesFor(BB1, DominatedBBs, PDT.get());
+
+ DEBUG(printBlockEquivalence(dbgs(), BB1));
+ }
+
+ // Assign weights to equivalence classes.
+ //
+ // All the basic blocks in the same equivalence class will execute
+ // the same number of times. Since we know that the head block in
+ // each equivalence class has the largest weight, assign that weight
+ // to all the blocks in that equivalence class.
+ DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
+ for (auto &BI : F) {
+ const BasicBlock *BB = &BI;
+ const BasicBlock *EquivBB = EquivalenceClass[BB];
+ if (BB != EquivBB)
+ BlockWeights[BB] = BlockWeights[EquivBB];
+ DEBUG(printBlockWeight(dbgs(), BB));
+ }
+}
+
+/// \brief Visit the given edge to decide if it has a valid weight.
+///
+/// If \p E has not been visited before, we copy to \p UnknownEdge
+/// and increment the count of unknown edges.
+///
+/// \param E Edge to visit.
+/// \param NumUnknownEdges Current number of unknown edges.
+/// \param UnknownEdge Set if E has not been visited before.
+///
+/// \returns E's weight, if known. Otherwise, return 0.
+uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
+ Edge *UnknownEdge) {
+ if (!VisitedEdges.count(E)) {
+ (*NumUnknownEdges)++;
+ *UnknownEdge = E;
+ return 0;
+ }
+
+ return EdgeWeights[E];
+}
+
+/// \brief Propagate weights through incoming/outgoing edges.
+///
+/// If the weight of a basic block is known, and there is only one edge
+/// with an unknown weight, we can calculate the weight of that edge.
+///
+/// Similarly, if all the edges have a known count, we can calculate the
+/// count of the basic block, if needed.
+///
+/// \param F Function to process.
+///
+/// \returns True if new weights were assigned to edges or blocks.
+bool SampleProfileLoader::propagateThroughEdges(Function &F) {
+ bool Changed = false;
+ DEBUG(dbgs() << "\nPropagation through edges\n");
+ for (const auto &BI : F) {
+ const BasicBlock *BB = &BI;
+ const BasicBlock *EC = EquivalenceClass[BB];
+
+ // Visit all the predecessor and successor edges to determine
+ // which ones have a weight assigned already. Note that it doesn't
+ // matter that we only keep track of a single unknown edge. The
+ // only case we are interested in handling is when only a single
+ // edge is unknown (see setEdgeOrBlockWeight).
+ for (unsigned i = 0; i < 2; i++) {
+ uint64_t TotalWeight = 0;
+ unsigned NumUnknownEdges = 0;
+ Edge UnknownEdge, SelfReferentialEdge;
+
+ if (i == 0) {
+ // First, visit all predecessor edges.
+ for (auto *Pred : Predecessors[BB]) {
+ Edge E = std::make_pair(Pred, BB);
+ TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+ if (E.first == E.second)
+ SelfReferentialEdge = E;
+ }
+ } else {
+ // On the second round, visit all successor edges.
+ for (auto *Succ : Successors[BB]) {
+ Edge E = std::make_pair(BB, Succ);
+ TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+ }
+ }
+
+ // After visiting all the edges, there are three cases that we
+ // can handle immediately:
+ //
+ // - All the edge weights are known (i.e., NumUnknownEdges == 0).
+ // In this case, we simply check that the sum of all the edges
+ // is the same as BB's weight. If not, we change BB's weight
+ // to match. Additionally, if BB had not been visited before,
+ // we mark it visited.
+ //
+ // - Only one edge is unknown and BB has already been visited.
+ // In this case, we can compute the weight of the edge by
+ // subtracting the total block weight from all the known
+ // edge weights. If the edges weight more than BB, then the
+ // edge of the last remaining edge is set to zero.
+ //
+ // - There exists a self-referential edge and the weight of BB is
+ // known. In this case, this edge can be based on BB's weight.
+ // We add up all the other known edges and set the weight on
+ // the self-referential edge as we did in the previous case.
+ //
+ // In any other case, we must continue iterating. Eventually,
+ // all edges will get a weight, or iteration will stop when
+ // it reaches SampleProfileMaxPropagateIterations.
+ if (NumUnknownEdges <= 1) {
+ uint64_t &BBWeight = BlockWeights[EC];
+ if (NumUnknownEdges == 0) {
+ // If we already know the weight of all edges, the weight of the
+ // basic block can be computed. It should be no larger than the sum
+ // of all edge weights.
+ if (TotalWeight > BBWeight) {
+ BBWeight = TotalWeight;
+ Changed = true;
+ DEBUG(dbgs() << "All edge weights for " << BB->getName()
+ << " known. Set weight for block: ";
+ printBlockWeight(dbgs(), BB););
+ }
+ if (VisitedBlocks.insert(EC).second)
+ Changed = true;
+ } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
+ // If there is a single unknown edge and the block has been
+ // visited, then we can compute E's weight.
+ if (BBWeight >= TotalWeight)
+ EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
+ else
+ EdgeWeights[UnknownEdge] = 0;
+ VisitedEdges.insert(UnknownEdge);
+ Changed = true;
+ DEBUG(dbgs() << "Set weight for edge: ";
+ printEdgeWeight(dbgs(), UnknownEdge));
+ }
+ } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
+ uint64_t &BBWeight = BlockWeights[BB];
+ // We have a self-referential edge and the weight of BB is known.
+ if (BBWeight >= TotalWeight)
+ EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
+ else
+ EdgeWeights[SelfReferentialEdge] = 0;
+ VisitedEdges.insert(SelfReferentialEdge);
+ Changed = true;
+ DEBUG(dbgs() << "Set self-referential edge weight to: ";
+ printEdgeWeight(dbgs(), SelfReferentialEdge));
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// \brief Build in/out edge lists for each basic block in the CFG.
+///
+/// We are interested in unique edges. If a block B1 has multiple
+/// edges to another block B2, we only add a single B1->B2 edge.
+void SampleProfileLoader::buildEdges(Function &F) {
+ for (auto &BI : F) {
+ BasicBlock *B1 = &BI;
+
+ // Add predecessors for B1.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ if (!Predecessors[B1].empty())
+ llvm_unreachable("Found a stale predecessors list in a basic block.");
+ for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
+ BasicBlock *B2 = *PI;
+ if (Visited.insert(B2).second)
+ Predecessors[B1].push_back(B2);
+ }
+
+ // Add successors for B1.
+ Visited.clear();
+ if (!Successors[B1].empty())
+ llvm_unreachable("Found a stale successors list in a basic block.");
+ for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
+ BasicBlock *B2 = *SI;
+ if (Visited.insert(B2).second)
+ Successors[B1].push_back(B2);
+ }
+ }
+}
+
+/// \brief Propagate weights into edges
+///
+/// The following rules are applied to every block BB in the CFG:
+///
+/// - If BB has a single predecessor/successor, then the weight
+/// of that edge is the weight of the block.
+///
+/// - If all incoming or outgoing edges are known except one, and the
+/// weight of the block is already known, the weight of the unknown
+/// edge will be the weight of the block minus the sum of all the known
+/// edges. If the sum of all the known edges is larger than BB's weight,
+/// we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+/// known, the weight for that edge is set to the weight of the block
+/// minus the weight of the other incoming edges to that block (if
+/// known).
+void SampleProfileLoader::propagateWeights(Function &F) {
+ bool Changed = true;
+ unsigned I = 0;
+
+ // Add an entry count to the function using the samples gathered
+ // at the function entry.
+ F.setEntryCount(Samples->getHeadSamples());
+
+ // Before propagation starts, build, for each block, a list of
+ // unique predecessors and successors. This is necessary to handle
+ // identical edges in multiway branches. Since we visit all blocks and all
+ // edges of the CFG, it is cleaner to build these lists once at the start
+ // of the pass.
+ buildEdges(F);
+
+ // Propagate until we converge or we go past the iteration limit.
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F);
+ }
+
+ // Generate MD_prof metadata for every branch instruction using the
+ // edge weights computed during propagation.
+ DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
+ LLVMContext &Ctx = F.getContext();
+ MDBuilder MDB(Ctx);
+ for (auto &BI : F) {
+ BasicBlock *BB = &BI;
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 1)
+ continue;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ continue;
+
+ DEBUG(dbgs() << "\nGetting weights for branch at line "
+ << TI->getDebugLoc().getLine() << ".\n");
+ SmallVector<uint32_t, 4> Weights;
+ uint32_t MaxWeight = 0;
+ DebugLoc MaxDestLoc;
+ for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = TI->getSuccessor(I);
+ Edge E = std::make_pair(BB, Succ);
+ uint64_t Weight = EdgeWeights[E];
+ DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
+ // Use uint32_t saturated arithmetic to adjust the incoming weights,
+ // if needed. Sample counts in profiles are 64-bit unsigned values,
+ // but internally branch weights are expressed as 32-bit values.
+ if (Weight > std::numeric_limits<uint32_t>::max()) {
+ DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
+ Weight = std::numeric_limits<uint32_t>::max();
+ }
+ Weights.push_back(static_cast<uint32_t>(Weight));
+ if (Weight != 0) {
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
+ }
+ }
+ }
+
+ // Only set weights if there is at least one non-zero weight.
+ // In any other case, let the analyzer set weights.
+ if (MaxWeight > 0) {
+ DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ DebugLoc BranchLoc = TI->getDebugLoc();
+ emitOptimizationRemark(
+ Ctx, DEBUG_TYPE, F, MaxDestLoc,
+ Twine("most popular destination for conditional branches at ") +
+ ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
+ Twine(BranchLoc.getLine()) + ":" +
+ Twine(BranchLoc.getCol()))
+ : Twine("<UNKNOWN LOCATION>")));
+ } else {
+ DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+ }
+ }
+}
+
+/// \brief Get the line number for the function header.
+///
+/// This looks up function \p F in the current compilation unit and
+/// retrieves the line number where the function is defined. This is
+/// line 0 for all the samples read from the profile file. Every line
+/// number is relative to this line.
+///
+/// \param F Function object to query.
+///
+/// \returns the line number where \p F is defined. If it returns 0,
+/// it means that there is no debug information available for \p F.
+unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
+ if (DISubprogram *S = getDISubprogram(&F))
+ return S->getLine();
+
+ // If the start of \p F is missing, emit a diagnostic to inform the user
+ // about the missed opportunity.
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ "No debug information found in function " + F.getName() +
+ ": Function profile not used",
+ DS_Warning));
+ return 0;
+}
+
+void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
+ DT.reset(new DominatorTree);
+ DT->recalculate(F);
+
+ PDT.reset(new DominatorTreeBase<BasicBlock>(true));
+ PDT->recalculate(F);
+
+ LI.reset(new LoopInfo);
+ LI->analyze(*DT);
+}
+
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// Branch weights are computed out of instruction samples using a
+/// propagation heuristic. Propagation proceeds in 3 phases:
+///
+/// 1- Assignment of block weights. All the basic blocks in the function
+/// are initial assigned the same weight as their most frequently
+/// executed instruction.
+///
+/// 2- Creation of equivalence classes. Since samples may be missing from
+/// blocks, we can fill in the gaps by setting the weights of all the
+/// blocks in the same equivalence class to the same weight. To compute
+/// the concept of equivalence, we use dominance and loop information.
+/// Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
+///
+/// 3- Propagation of block weights into edges. This uses a simple
+/// propagation heuristic. The following rules are applied to every
+/// block BB in the CFG:
+///
+/// - If BB has a single predecessor/successor, then the weight
+/// of that edge is the weight of the block.
+///
+/// - If all the edges are known except one, and the weight of the
+/// block is already known, the weight of the unknown edge will
+/// be the weight of the block minus the sum of all the known
+/// edges. If the sum of all the known edges is larger than BB's weight,
+/// we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+/// known, the weight for that edge is set to the weight of the block
+/// minus the weight of the other incoming edges to that block (if
+/// known).
+///
+/// Since this propagation is not guaranteed to finalize for every CFG, we
+/// only allow it to proceed for a limited number of iterations (controlled
+/// by -sample-profile-max-propagate-iterations).
+///
+/// FIXME: Try to replace this propagation heuristic with a scheme
+/// that is guaranteed to finalize. A work-list approach similar to
+/// the standard value propagation algorithm used by SSA-CCP might
+/// work here.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on BB using the computed values for each of its branches.
+///
+/// \param F The function to query.
+///
+/// \returns true if \p F was modified. Returns false, otherwise.
+bool SampleProfileLoader::emitAnnotations(Function &F) {
+ bool Changed = false;
+
+ if (getFunctionLoc(F) == 0)
+ return false;
+
+ DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
+ << ": " << getFunctionLoc(F) << "\n");
+
+ Changed |= emitInlineHints(F);
+
+ Changed |= inlineHotFunctions(F);
+
+ // Compute basic block weights.
+ Changed |= computeBlockWeights(F);
+
+ if (Changed) {
+ // Compute dominance and loop info needed for propagation.
+ computeDominanceAndLoopInfo(F);
+
+ // Find equivalence classes.
+ findEquivalenceClasses(F);
+
+ // Propagate weights to all edges.
+ propagateWeights(F);
+ }
+
+ // If coverage checking was requested, compute it now.
+ if (SampleProfileRecordCoverage) {
+ unsigned Used = CoverageTracker.countUsedRecords(Samples);
+ unsigned Total = CoverageTracker.countBodyRecords(Samples);
+ unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+ if (Coverage < SampleProfileRecordCoverage) {
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ Twine(Used) + " of " + Twine(Total) + " available profile records (" +
+ Twine(Coverage) + "%) were applied",
+ DS_Warning));
+ }
+ }
+
+ if (SampleProfileSampleCoverage) {
+ uint64_t Used = CoverageTracker.getTotalUsedSamples();
+ uint64_t Total = CoverageTracker.countBodySamples(Samples);
+ unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+ if (Coverage < SampleProfileSampleCoverage) {
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
+ Twine(Coverage) + "%) were applied",
+ DS_Warning));
+ }
+ }
+ return Changed;
+}
+
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
+ "Sample Profile loader", false, false)
+INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
+INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
+ "Sample Profile loader", false, false)
+
+bool SampleProfileLoader::doInitialization(Module &M) {
+ auto &Ctx = M.getContext();
+ auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+ Reader = std::move(ReaderOrErr.get());
+ ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ return true;
+}
+
+ModulePass *llvm::createSampleProfileLoaderPass() {
+ return new SampleProfileLoader(SampleProfileFile);
+}
+
+ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+ return new SampleProfileLoader(Name);
+}
+
+bool SampleProfileLoader::runOnModule(Module &M) {
+ if (!ProfileIsValid)
+ return false;
+
+ // Compute the total number of samples collected in this profile.
+ for (const auto &I : Reader->getProfiles())
+ TotalCollectedSamples += I.second.getTotalSamples();
+
+ bool retval = false;
+ for (auto &F : M)
+ if (!F.isDeclaration()) {
+ clearFunctionData();
+ retval |= runOnFunction(F);
+ }
+ return retval;
+}
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+ Samples = Reader->getSamplesFor(F);
+ if (!Samples->empty())
+ return emitAnnotations(F);
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 956991a..c94cc7c 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -7,47 +7,31 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass loops over all of the functions in the input module, looking for
+// This pass loops over all of the functions in the input module, looking for
// dead declarations and removes them. Dead declarations are declarations of
// functions for which no implementation is available (i.e., declarations for
// unused library functions).
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+
using namespace llvm;
#define DEBUG_TYPE "strip-dead-prototypes"
STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
-namespace {
-
-/// @brief Pass to remove unused function declarations.
-class StripDeadPrototypesPass : public ModulePass {
-public:
- static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesPass() : ModulePass(ID) {
- initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnModule(Module &M) override;
-};
-
-} // end anonymous namespace
-
-char StripDeadPrototypesPass::ID = 0;
-INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
- "Strip Unused Function Prototypes", false, false)
-
-bool StripDeadPrototypesPass::runOnModule(Module &M) {
+static bool stripDeadPrototypes(Module &M) {
bool MadeChange = false;
-
+
// Erase dead function prototypes.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function *F = I++;
+ Function *F = &*I++;
// Function must be a prototype and unused.
if (F->isDeclaration() && F->use_empty()) {
F->eraseFromParent();
@@ -59,16 +43,42 @@ bool StripDeadPrototypesPass::runOnModule(Module &M) {
// Erase dead global var prototypes.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ) {
- GlobalVariable *GV = I++;
+ GlobalVariable *GV = &*I++;
// Global must be a prototype and unused.
if (GV->isDeclaration() && GV->use_empty())
GV->eraseFromParent();
}
-
+
// Return an indication of whether we changed anything or not.
return MadeChange;
}
+PreservedAnalyses StripDeadPrototypesPass::run(Module &M) {
+ if (stripDeadPrototypes(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+class StripDeadPrototypesLegacyPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ StripDeadPrototypesLegacyPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override {
+ return stripDeadPrototypes(M);
+ }
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesLegacyPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false)
+
ModulePass *llvm::createStripDeadPrototypesPass() {
- return new StripDeadPrototypesPass();
+ return new StripDeadPrototypesLegacyPass();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index a4f30c5..46f352f 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -211,13 +211,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
}
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
@@ -305,6 +305,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
SmallVector<Metadata *, 64> LiveSubprograms;
DenseSet<const MDNode *> VisitedSet;
+ std::set<DISubprogram *> LiveSPs;
+ for (Function &F : M) {
+ if (DISubprogram *SP = F.getSubprogram())
+ LiveSPs.insert(SP);
+ }
+
for (DICompileUnit *DIC : F.compile_units()) {
// Create our live subprogram list.
bool SubprogramChange = false;
@@ -314,7 +320,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
continue;
// If the function referenced by DISP is not null, the function is live.
- if (DISP->getFunction())
+ if (LiveSPs.count(DISP))
LiveSubprograms.push_back(DISP);
else
SubprogramChange = true;
OpenPOWER on IntegriCloud