summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp342
1 files changed, 218 insertions, 124 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index d021bce..f0a9f2b 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -17,17 +17,18 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -51,8 +52,8 @@ namespace {
public:
InvokeInliningInfo(InvokeInst *II)
- : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
- CallerLPad(0), InnerEHValuesPHI(0) {
+ : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
+ CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
// If there are PHI nodes in the unwind destination block, we need to keep
// track of which values came into them from the invoke before removing
// the edge from this block.
@@ -144,7 +145,6 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
void InvokeInliningInfo::forwardResume(ResumeInst *RI,
SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
- LandingPadInst *OuterLPad = getLandingPadInst();
BasicBlock *Src = RI->getParent();
BranchInst::Create(Dest, Src);
@@ -155,16 +155,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
RI->eraseFromParent();
-
- // Append the clauses from the outer landing pad instruction into the inlined
- // landing pad instructions.
- for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
- E = InlinedLPads.end(); I != E; ++I) {
- LandingPadInst *InlinedLPad = *I;
- for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
- OuterIdx != OuterNum; ++OuterIdx)
- InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
- }
}
/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -172,22 +162,11 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
/// invokes. This function analyze BB to see if there are any calls, and if so,
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
/// nodes in that block with the values specified in InvokeDestPHIValues.
-///
-/// Returns true to indicate that the next block should be skipped.
-static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
InvokeInliningInfo &Invoke) {
- LandingPadInst *LPI = Invoke.getLandingPadInst();
-
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *I = BBI++;
- if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
- unsigned NumClauses = LPI->getNumClauses();
- L->reserveClauses(NumClauses);
- for (unsigned i = 0; i != NumClauses; ++i)
- L->addClause(LPI->getClause(i));
- }
-
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
@@ -210,6 +189,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
Invoke.getOuterResumeDest(),
InvokeArgs, CI->getName(), BB);
+ II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
@@ -223,10 +203,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
// Update any PHI nodes in the exceptional block to indicate that there is
// now a new entry in them.
Invoke.addIncomingPHIValuesFor(BB);
- return false;
+ return;
}
-
- return false;
}
/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
@@ -252,13 +230,23 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
InlinedLPads.insert(II->getLandingPadInst());
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ unsigned OuterNum = OuterLPad->getNumClauses();
+ InlinedLPad->reserveClauses(OuterNum);
+ for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ if (OuterLPad->isCleanup())
+ InlinedLPad->setCleanup(true);
+ }
+
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
if (InlinedCodeInfo.ContainsCalls)
- if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
- // Honor a request to skip the next block.
- ++BB;
- continue;
- }
+ HandleCallsInBlockInlinedThroughInvoke(BB, Invoke);
// Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
@@ -303,13 +291,13 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
- if (VMI == VMap.end() || VMI->second == 0)
+ if (VMI == VMap.end() || VMI->second == nullptr)
continue;
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
- if (NewCall == 0) continue;
+ if (!NewCall) continue;
// Remember that this call site got inlined for the client of
// InlineFunction.
@@ -320,7 +308,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// happens, set the callee of the new call site to a more precise
// destination. This can also happen if the call graph node of the caller
// was just unnecessarily imprecise.
- if (I->second->getFunction() == 0)
+ if (!I->second->getFunction())
if (Function *F = CallSite(NewCall).getCalledFunction()) {
// Indirect call site resolved to direct call.
CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
@@ -336,13 +324,44 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->removeCallEdgeFor(CS);
}
+static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
+ BasicBlock *InsertBlock,
+ InlineFunctionInfo &IFI) {
+ LLVMContext &Context = Src->getContext();
+ Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+ Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
+ Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) };
+ Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+ IRBuilder<> builder(InsertBlock->begin());
+ Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp");
+ Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp");
+
+ Value *Size;
+ if (IFI.DL == nullptr)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.DL->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DstCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ builder.CreateCall(MemCpyFn, CallArgs);
+}
+
/// HandleByValArgument - When inlining a call site that has a byval argument,
/// we have to make the implicit memcpy explicit by adding it.
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
unsigned ByValAlignment) {
- Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+ PointerType *ArgTy = cast<PointerType>(Arg->getType());
+ Type *AggTy = ArgTy->getElementType();
// If the called function is readonly, then it could not mutate the caller's
// copy of the byval'd memory. In this case, it is safe to elide the copy and
@@ -357,21 +376,17 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
- IFI.TD) >= ByValAlignment)
+ IFI.DL) >= ByValAlignment)
return Arg;
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
// for code quality, but rarely happens and is required for correctness.
}
-
- LLVMContext &Context = Arg->getContext();
- Type *VoidPtrTy = Type::getInt8PtrTy(Context);
-
// Create the alloca. If we have DataLayout, use nice alignment.
unsigned Align = 1;
- if (IFI.TD)
- Align = IFI.TD->getPrefTypeAlignment(AggTy);
+ if (IFI.DL)
+ Align = IFI.DL->getPrefTypeAlignment(AggTy);
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
@@ -380,32 +395,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Function *Caller = TheCall->getParent()->getParent();
- Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
+ Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(),
&*Caller->begin()->begin());
- // Emit a memcpy.
- Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
- Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
- Intrinsic::memcpy,
- Tys);
- Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
- Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
-
- Value *Size;
- if (IFI.TD == 0)
- Size = ConstantExpr::getSizeOf(AggTy);
- else
- Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.TD->getTypeStoreSize(AggTy));
-
- // Always generate a memcpy of alignment 1 here because we don't know
- // the alignment of the src pointer. Other optimizations can infer
- // better alignment.
- Value *CallArgs[] = {
- DestCast, SrcCast, Size,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- ConstantInt::getFalse(Context) // isVolatile
- };
- IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
// Uses of the argument in the function should use our new alloca
// instead.
@@ -415,9 +407,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
// intrinsic.
static bool isUsedByLifetimeMarker(Value *V) {
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
- ++UI) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+ for (User *U : V->users()) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::lifetime_start:
@@ -432,16 +423,17 @@ static bool isUsedByLifetimeMarker(Value *V) {
// hasLifetimeMarkers - Check whether the given alloca already has
// lifetime.start or lifetime.end intrinsics.
static bool hasLifetimeMarkers(AllocaInst *AI) {
- Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
- if (AI->getType() == Int8PtrTy)
+ Type *Ty = AI->getType();
+ Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
+ Ty->getPointerAddressSpace());
+ if (Ty == Int8PtrTy)
return isUsedByLifetimeMarker(AI);
// Do a scan to find all the casts to i8*.
- for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
- ++I) {
- if (I->getType() != Int8PtrTy) continue;
- if (I->stripPointerCasts() != AI) continue;
- if (isUsedByLifetimeMarker(*I))
+ for (User *U : AI->users()) {
+ if (U->getType() != Int8PtrTy) continue;
+ if (U->stripPointerCasts() != AI) continue;
+ if (isUsedByLifetimeMarker(U))
return true;
}
return false;
@@ -475,7 +467,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
DebugLoc DL = BI->getDebugLoc();
- if (!DL.isUnknown()) {
+ if (DL.isUnknown()) {
+ // If the inlined instruction has no line number, make it look as if it
+ // originates from the call location. This is important for
+ // ((__always_inline__, __nodebug__)) functions which must use caller
+ // location for all instructions in their function body.
+ BI->setDebugLoc(TheCallDL);
+ } else {
BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
LLVMContext &Ctx = BI->getContext();
@@ -488,6 +486,33 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
+/// Returns a musttail call instruction if one immediately precedes the given
+/// return instruction with an optional bitcast instruction between them.
+static CallInst *getPrecedingMustTailCall(ReturnInst *RI) {
+ Instruction *Prev = RI->getPrevNode();
+ if (!Prev)
+ return nullptr;
+
+ if (Value *RV = RI->getReturnValue()) {
+ if (RV != Prev)
+ return nullptr;
+
+ // Look through the optional bitcast.
+ if (auto *BI = dyn_cast<BitCastInst>(Prev)) {
+ RV = BI->getOperand(0);
+ Prev = BI->getPrevNode();
+ if (!Prev || RV != Prev)
+ return nullptr;
+ }
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(Prev)) {
+ if (CI->isMustTailCall())
+ return CI;
+ }
+ return nullptr;
+}
+
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller. This returns false if it is not possible to inline
/// this call. The program is still in a well defined state if this occurs
@@ -507,15 +532,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
IFI.reset();
const Function *CalledFunc = CS.getCalledFunction();
- if (CalledFunc == 0 || // Can't inline external function or indirect
+ if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
- // If the call to the callee is not a tail call, we must clear the 'tail'
- // flags on any calls that we inline.
- bool MustClearTailCallFlags =
- !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
-
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
bool MarkNoUnwind = CS.doesNotThrow();
@@ -535,7 +555,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
// Get the personality function from the callee if it contains a landing pad.
- Value *CalleePersonality = 0;
+ Value *CalleePersonality = nullptr;
for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
I != E; ++I)
if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
@@ -578,6 +598,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
{ // Scope to destroy VMap after cloning.
ValueToValueMapTy VMap;
+ // Keep a list of pair (dst, src) to emit byval initializations.
+ SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -597,11 +619,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (CS.isByValArgument(ArgNo)) {
ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
CalledFunc->getParamAlignment(ArgNo+1));
-
- // Calls that we inline may use the new alloca, so we need to clear
- // their 'tail' flags if HandleByValArgument introduced a new alloca and
- // the callee has calls.
- MustClearTailCallFlags |= ActualArg != *AI;
+ if (ActualArg != *AI)
+ ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
}
VMap[I] = ActualArg;
@@ -613,11 +632,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, IFI.TD, TheCall);
+ &InlinedFunctionInfo, IFI.DL, TheCall);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
+ // Inject byval arguments initialization.
+ for (std::pair<Value*, Value*> &Init : ByValInit)
+ HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+ FirstNewBlock, IFI);
+
// Update the callgraph if requested.
if (IFI.CG)
UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
@@ -635,7 +659,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
for (BasicBlock::iterator I = FirstNewBlock->begin(),
E = FirstNewBlock->end(); I != E; ) {
AllocaInst *AI = dyn_cast<AllocaInst>(I++);
- if (AI == 0) continue;
+ if (!AI) continue;
// If the alloca is now dead, remove it. This often occurs due to code
// specialization.
@@ -667,6 +691,45 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ bool InlinedMustTailCalls = false;
+ if (InlinedFunctionInfo.ContainsCalls) {
+ CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
+ if (CallInst *CI = dyn_cast<CallInst>(TheCall))
+ CallSiteTailKind = CI->getTailCallKind();
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
+ ++BB) {
+ for (Instruction &I : *BB) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI)
+ continue;
+
+ // We need to reduce the strength of any inlined tail calls. For
+ // musttail, we have to avoid introducing potential unbounded stack
+ // growth. For example, if functions 'f' and 'g' are mutually recursive
+ // with musttail, we can inline 'g' into 'f' so long as we preserve
+ // musttail on the cloned call to 'f'. If either the inlined call site
+ // or the cloned call site is *not* musttail, the program already has
+ // one frame of stack growth, so it's safe to remove musttail. Here is
+ // a table of example transformations:
+ //
+ // f -> musttail g -> musttail f ==> f -> musttail f
+ // f -> musttail g -> tail f ==> f -> tail f
+ // f -> g -> musttail f ==> f -> f
+ // f -> g -> tail f ==> f -> f
+ CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
+ ChildTCK = std::min(CallSiteTailKind, ChildTCK);
+ CI->setTailCallKind(ChildTCK);
+ InlinedMustTailCalls |= CI->isMustTailCall();
+
+ // Calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+ }
+
// Leave lifetime markers for the static alloca's, scoping them to the
// function we just inlined.
if (InsertLifetime && !IFI.StaticAllocas.empty()) {
@@ -680,12 +743,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
continue;
// Try to determine the size of the allocation.
- ConstantInt *AllocaSize = 0;
+ ConstantInt *AllocaSize = nullptr;
if (ConstantInt *AIArraySize =
dyn_cast<ConstantInt>(AI->getArraySize())) {
- if (IFI.TD) {
+ if (IFI.DL) {
Type *AllocaType = AI->getAllocatedType();
- uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+ uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
// Check that array size doesn't saturate uint64_t and doesn't
@@ -699,9 +762,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
builder.CreateLifetimeStart(AI, AllocaSize);
- for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
- IRBuilder<> builder(Returns[ri]);
- builder.CreateLifetimeEnd(AI, AllocaSize);
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.lifetime.end calls between a musttail call and a
+ // return. The return kills all local allocas.
+ if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+ continue;
+ IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
}
}
}
@@ -720,33 +786,56 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Insert a call to llvm.stackrestore before any return instructions in the
// inlined function.
- for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
- IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.stackrestore calls between a musttail call and a
+ // return. The return will restore the stack pointer.
+ if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+ continue;
+ IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
}
}
- // If we are inlining tail call instruction through a call site that isn't
- // marked 'tail', we must remove the tail marker for any calls in the inlined
- // code. Also, calls inlined through a 'nounwind' call site should be marked
- // 'nounwind'.
- if (InlinedFunctionInfo.ContainsCalls &&
- (MustClearTailCallFlags || MarkNoUnwind)) {
- for (Function::iterator BB = FirstNewBlock, E = Caller->end();
- BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (MustClearTailCallFlags)
- CI->setTailCall(false);
- if (MarkNoUnwind)
- CI->setDoesNotThrow();
- }
- }
-
// If we are inlining for an invoke instruction, we must make sure to rewrite
// any call instructions into invoke instructions.
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+ // Handle any inlined musttail call sites. In order for a new call site to be
+ // musttail, the source of the clone and the inlined call site must have been
+ // musttail. Therefore it's safe to return without merging control into the
+ // phi below.
+ if (InlinedMustTailCalls) {
+ // Check if we need to bitcast the result of any musttail calls.
+ Type *NewRetTy = Caller->getReturnType();
+ bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
+
+ // Handle the returns preceded by musttail calls separately.
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ for (ReturnInst *RI : Returns) {
+ CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI);
+ if (!ReturnedMustTail) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+ if (!NeedBitCast)
+ continue;
+
+ // Delete the old return and any preceding bitcast.
+ BasicBlock *CurBB = RI->getParent();
+ auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
+ RI->eraseFromParent();
+ if (OldCast)
+ OldCast->eraseFromParent();
+
+ // Insert a new bitcast and return with the right type.
+ IRBuilder<> Builder(CurBB);
+ Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+
// If we cloned in _exactly one_ basic block, and if that block ends in a
// return instruction, we splice the body of the inlined callee directly into
// the calling basic block.
@@ -790,7 +879,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// "starter" and "ender" blocks. How we accomplish this depends on whether
// this is an invoke instruction or a call instruction.
BasicBlock *AfterCallBB;
- BranchInst *CreatedBranchToNormalDest = NULL;
+ BranchInst *CreatedBranchToNormalDest = nullptr;
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
// Add an unconditional branch to make this look like the CallInst case...
@@ -829,7 +918,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// any users of the original call/invoke instruction.
Type *RTy = CalledFunc->getReturnType();
- PHINode *PHI = 0;
+ PHINode *PHI = nullptr;
if (Returns.size() > 1) {
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
@@ -902,6 +991,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Since we are now done with the Call/Invoke, we can delete it.
TheCall->eraseFromParent();
+ // If we inlined any musttail calls and the original return is now
+ // unreachable, delete it. It can only contain a bitcast and ret.
+ if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
+ AfterCallBB->eraseFromParent();
+
// We should always be able to fold the entry block of the function into the
// single predecessor of the block...
assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
@@ -922,7 +1016,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the entries are the same or undef). If so, remove the PHI so it doesn't
// block other optimizations.
if (PHI) {
- if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+ if (Value *V = SimplifyInstruction(PHI, IFI.DL)) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
}
OpenPOWER on IntegriCloud