diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/IPO/ArgumentPromotion.cpp | 13 | ||||
-rw-r--r-- | lib/Transforms/IPO/DeadArgumentElimination.cpp | 11 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalDCE.cpp | 15 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 38 | ||||
-rw-r--r-- | lib/Transforms/IPO/MergeFunctions.cpp | 482 | ||||
-rw-r--r-- | lib/Transforms/IPO/PartialInlining.cpp | 171 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 95 | ||||
-rw-r--r-- | lib/Transforms/Scalar/InstructionCombining.cpp | 18 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyCFGPass.cpp | 10 |
9 files changed, 602 insertions, 251 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 2bb6428..a612634 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -127,17 +127,8 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. - for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); - UI != E; ++UI) { - CallSite CS = CallSite::get(*UI); - if (!CS.getInstruction()) // "Taking the address" of the function - return false; - - // Ensure that this call site is CALLING the function, not passing it as - // an argument. - if (!CS.isCallee(UI)) - return false; - } + if (F->hasAddressTaken()) + return false; // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 666db7e..e480dad 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -175,15 +175,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; // Ensure that the function is only directly called. - for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ++I) { - // If this use is anything other than a call site, give up. - CallSite CS = CallSite::get(*I); - Instruction *TheCall = CS.getInstruction(); - if (!TheCall) return false; // Not a direct call site? - - // The addr of this function is passed to the call. - if (!CS.isCallee(I)) return false; - } + if (Fn.hasAddressTaken()) + return false; // Okay, we know we can transform this function if safe. Scan its body // looking for calls to llvm.vastart. diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index db378b0..9c652b9 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -47,7 +47,6 @@ namespace { void GlobalIsNeeded(GlobalValue *GV); void MarkUsedGlobalsAsNeeded(Constant *C); - bool SafeToDestroyConstant(Constant* C); bool RemoveUnusedGlobalValue(GlobalValue &GV); }; } @@ -211,17 +210,3 @@ bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { GV.removeDeadConstantUsers(); return GV.use_empty(); } - -// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used -// by constants itself. Note that constants cannot be cyclic, so this test is -// pretty easy to implement recursively. -// -bool GlobalDCE::SafeToDestroyConstant(Constant *C) { - for (Value::use_iterator I = C->use_begin(), E = C->use_end(); I != E; ++I) - if (Constant *User = dyn_cast<Constant>(*I)) { - if (!SafeToDestroyConstant(User)) return false; - } else { - return false; - } - return true; -} diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 5f12825..9a1b294 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -136,16 +136,16 @@ struct VISIBILITY_HIDDEN GlobalStatus { } -/// ConstantIsDead - Return true if the specified constant is (transitively) -/// dead. The constant may be used by other constants (e.g. constant arrays and -/// constant exprs) as long as they are dead, but it cannot be used by anything -/// else. -static bool ConstantIsDead(Constant *C) { +// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used +// by constants itself. Note that constants cannot be cyclic, so this test is +// pretty easy to implement recursively. +// +static bool SafeToDestroyConstant(Constant *C) { if (isa<GlobalValue>(C)) return false; for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) if (Constant *CU = dyn_cast<Constant>(*UI)) { - if (!ConstantIsDead(CU)) return false; + if (!SafeToDestroyConstant(CU)) return false; } else return false; return true; @@ -233,7 +233,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, } else if (Constant *C = dyn_cast<Constant>(*UI)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. - if (!ConstantIsDead(C)) + if (!SafeToDestroyConstant(C)) return true; } else { GS.HasNonInstructionUser = true; @@ -338,7 +338,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { } else if (Constant *C = dyn_cast<Constant>(U)) { // If we have a chain of dead constantexprs or other things dangling from // us, and if they are all dead, nuke them without remorse. - if (ConstantIsDead(C)) { + if (SafeToDestroyConstant(C)) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. CleanupConstantGlobalUsers(V, Init); @@ -354,7 +354,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { static bool isSafeSROAElementUse(Value *V) { // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast<Constant>(V)) - return ConstantIsDead(C); + return SafeToDestroyConstant(C); Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; @@ -1769,22 +1769,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return false; } -/// OnlyCalledDirectly - Return true if the specified function is only called -/// directly. In other words, its address is never taken. -static bool OnlyCalledDirectly(Function *F) { - for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ - Instruction *User = dyn_cast<Instruction>(*UI); - if (!User) return false; - if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false; - - // See if the function address is passed as an argument. - for (User::op_iterator i = User->op_begin() + 1, e = User->op_end(); - i != e; ++i) - if (*i == F) return false; - } - return true; -} - /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified /// function, changing them to FastCC. static void ChangeCalleesToFastCall(Function *F) { @@ -1830,7 +1814,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { ++NumFnDeleted; } else if (F->hasLocalLinkage()) { if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && - OnlyCalledDirectly(F)) { + !F->hasAddressTaken()) { // If this function has C calling conventions, is not a varargs // function, and is only called directly, promote it to use the Fast // calling convention. @@ -1841,7 +1825,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { } if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && - OnlyCalledDirectly(F)) { + !F->hasAddressTaken()) { // The function is not used by a trampoline intrinsic, so it is safe // to remove the 'nest' attribute. RemoveNestAttribute(F); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 17bc2d4..5693cc0 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -9,10 +9,6 @@ // // This pass looks for equivalent functions that are mergable and folds them. // -// A Function will not be analyzed if: -// * it is overridable at runtime (except for weak linkage), or -// * it is used by anything other than the callee parameter of a call/invoke -// // A hash is computed from the function, based on its type and number of // basic blocks. // @@ -24,8 +20,6 @@ // When a match is found, the functions are folded. We can only fold two // functions when we know that the definition of one of them is not // overridable. -// * fold a function marked internal by replacing all of its users. -// * fold extern or weak functions by replacing them with a global alias // //===----------------------------------------------------------------------===// // @@ -48,6 +42,7 @@ #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/InlineAsm.h" @@ -62,7 +57,6 @@ using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); -STATISTIC(NumMergeFails, "Number of identical function pairings not merged"); namespace { struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass { @@ -81,16 +75,169 @@ ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } +// ===----------------------------------------------------------------------=== +// Comparison of functions +// ===----------------------------------------------------------------------=== + static unsigned long hash(const Function *F) { - return F->size() ^ reinterpret_cast<unsigned long>(F->getType()); - //return F->size() ^ F->arg_size() ^ F->getReturnType(); + const FunctionType *FTy = F->getFunctionType(); + + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(FTy->getReturnType()->getTypeID()); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(FTy->getParamType(i)->getTypeID()); + return ID.ComputeHash(); +} + +/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by +/// walking the chain of cast operands. Otherwise, returns the argument. +static Value* IgnoreBitcasts(Value *V) { + while (BitCastInst *BC = dyn_cast<BitCastInst>(V)) + V = BC->getOperand(0); + + return V; +} + +/// isEquivalentType - any two pointers are equivalent. Otherwise, standard +/// type equivalence rules apply. +static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { + if (Ty1 == Ty2) + return true; + if (Ty1->getTypeID() != Ty2->getTypeID()) + return false; + + switch(Ty1->getTypeID()) { + case Type::VoidTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + return true; + + case Type::IntegerTyID: + case Type::OpaqueTyID: + // Ty1 == Ty2 would have returned true earlier. + return false; + + default: + assert(0 && "Unknown type!"); + return false; + + case Type::PointerTyID: { + const PointerType *PTy1 = cast<PointerType>(Ty1); + const PointerType *PTy2 = cast<PointerType>(Ty2); + return PTy1->getAddressSpace() == PTy2->getAddressSpace(); + } + + case Type::StructTyID: { + const StructType *STy1 = cast<StructType>(Ty1); + const StructType *STy2 = cast<StructType>(Ty2); + if (STy1->getNumElements() != STy2->getNumElements()) + return false; + + if (STy1->isPacked() != STy2->isPacked()) + return false; + + for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) { + if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i))) + return false; + } + return true; + } + + case Type::FunctionTyID: { + const FunctionType *FTy1 = cast<FunctionType>(Ty1); + const FunctionType *FTy2 = cast<FunctionType>(Ty2); + if (FTy1->getNumParams() != FTy2->getNumParams() || + FTy1->isVarArg() != FTy2->isVarArg()) + return false; + + if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType())) + return false; + + for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) { + if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i))) + return false; + } + return true; + } + + case Type::ArrayTyID: + case Type::VectorTyID: { + const SequentialType *STy1 = cast<SequentialType>(Ty1); + const SequentialType *STy2 = cast<SequentialType>(Ty2); + return isEquivalentType(STy1->getElementType(), STy2->getElementType()); + } + } +} + +/// isEquivalentOperation - determine whether the two operations are the same +/// except that pointer-to-A and pointer-to-B are equivalent. This should be +/// kept in sync with Instruction::isSameOperationAs. +static bool +isEquivalentOperation(const Instruction *I1, const Instruction *I2) { + if (I1->getOpcode() != I2->getOpcode() || + I1->getNumOperands() != I2->getNumOperands() || + !isEquivalentType(I1->getType(), I2->getType())) + return false; + + // We have two instructions of identical opcode and #operands. Check to see + // if all operands are the same type + for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i) + if (!isEquivalentType(I1->getOperand(i)->getType(), + I2->getOperand(i)->getType())) + return false; + + // Check special state that is a part of some instructions. + if (const LoadInst *LI = dyn_cast<LoadInst>(I1)) + return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() && + LI->getAlignment() == cast<LoadInst>(I2)->getAlignment(); + if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) + return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && + SI->getAlignment() == cast<StoreInst>(I2)->getAlignment(); + if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) + return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); + if (const CallInst *CI = dyn_cast<CallInst>(I1)) + return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() && + CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && + CI->getAttributes().getRawPointer() == + cast<CallInst>(I2)->getAttributes().getRawPointer(); + if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) + return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && + CI->getAttributes().getRawPointer() == + cast<InvokeInst>(I2)->getAttributes().getRawPointer(); + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { + if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) + return false; + for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i) + if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i]) + return false; + return true; + } + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) { + if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices()) + return false; + for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i) + if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i]) + return false; + return true; + } + + return true; } static bool compare(const Value *V, const Value *U) { assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) && "Must not compare basic blocks."); - assert(V->getType() == U->getType() && + assert(isEquivalentType(V->getType(), U->getType()) && "Two of the same operation have operands of different type."); // TODO: If the constant is an expression of F, we should accept that it's @@ -117,20 +264,40 @@ static bool compare(const Value *V, const Value *U) { static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, DenseMap<const Value *, const Value *> &ValueMap, DenseMap<const Value *, const Value *> &SpeculationMap) { - // Specutively add it anyways. If it's false, we'll notice a difference later, and - // this won't matter. + // Speculatively add it anyways. If it's false, we'll notice a difference + // later, and this won't matter. ValueMap[BB1] = BB2; BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); do { - if (!FI->isSameOperationAs(const_cast<Instruction *>(&*GI))) - return false; + if (isa<BitCastInst>(FI)) { + ++FI; + continue; + } + if (isa<BitCastInst>(GI)) { + ++GI; + continue; + } - if (FI->getNumOperands() != GI->getNumOperands()) + if (!isEquivalentOperation(FI, GI)) return false; + if (isa<GetElementPtrInst>(FI)) { + const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI); + const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI); + if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) { + // It's effectively a bitcast. + ++FI, ++GI; + continue; + } + + // TODO: we only really care about the elements before the index + if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType()) + return false; + } + if (ValueMap[FI] == GI) { ++FI, ++GI; continue; @@ -140,8 +307,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, return false; for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { - Value *OpF = FI->getOperand(i); - Value *OpG = GI->getOperand(i); + Value *OpF = IgnoreBitcasts(FI->getOperand(i)); + Value *OpG = IgnoreBitcasts(GI->getOperand(i)); if (ValueMap[OpF] == OpG) continue; @@ -149,10 +316,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, if (ValueMap[OpF] != NULL) return false; - assert(OpF->getType() == OpG->getType() && - "Two of the same operation has operands of different type."); - - if (OpF->getValueID() != OpG->getValueID()) + if (OpF->getValueID() != OpG->getValueID() || + !isEquivalentType(OpF->getType(), OpG->getType())) return false; if (isa<PHINode>(FI)) { @@ -203,14 +368,15 @@ static bool equals(const Function *F, const Function *G) { if (F->hasSection() && F->getSection() != G->getSection()) return false; + if (F->isVarArg() != G->isVarArg()) + return false; + // TODO: if it's internal and only used in direct calls, we could handle this // case too. if (F->getCallingConv() != G->getCallingConv()) return false; - // TODO: We want to permit cases where two functions take T* and S* but - // only load or store them into T** and S**. - if (F->getType() != G->getType()) + if (!isEquivalentType(F->getFunctionType(), G->getFunctionType())) return false; DenseMap<const Value *, const Value *> ValueMap; @@ -237,89 +403,213 @@ static bool equals(const Function *F, const Function *G) { return true; } -static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { - if (FnVec[i]->mayBeOverridden() && !FnVec[j]->mayBeOverridden()) - std::swap(FnVec[i], FnVec[j]); - - Function *F = FnVec[i]; - Function *G = FnVec[j]; +// ===----------------------------------------------------------------------=== +// Folding of functions +// ===----------------------------------------------------------------------=== + +// Cases: +// * F is external strong, G is external strong: +// turn G into a thunk to F (1) +// * F is external strong, G is external weak: +// turn G into a thunk to F (1) +// * F is external weak, G is external weak: +// unfoldable +// * F is external strong, G is internal: +// address of G taken: +// turn G into a thunk to F (1) +// address of G not taken: +// make G an alias to F (2) +// * F is internal, G is external weak +// address of F is taken: +// turn G into a thunk to F (1) +// address of F is not taken: +// make G an alias of F (2) +// * F is internal, G is internal: +// address of F and G are taken: +// turn G into a thunk to F (1) +// address of G is not taken: +// make G an alias to F (2) +// +// alias requires linkage == (external,local,weak) fallback to creating a thunk +// external means 'externally visible' linkage != (internal,private) +// internal means linkage == (internal,private) +// weak means linkage mayBeOverridable +// being external implies that the address is taken +// +// 1. turn G into a thunk to F +// 2. make G an alias to F + +enum LinkageCategory { + ExternalStrong, + ExternalWeak, + Internal +}; + +static LinkageCategory categorize(const Function *F) { + switch (F->getLinkage()) { + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + return Internal; + + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::ExternalWeakLinkage: + return ExternalWeak; + + case GlobalValue::ExternalLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::AppendingLinkage: + case GlobalValue::DLLImportLinkage: + case GlobalValue::DLLExportLinkage: + case GlobalValue::GhostLinkage: + case GlobalValue::CommonLinkage: + return ExternalStrong; + } - if (!F->mayBeOverridden()) { - if (G->hasLocalLinkage()) { - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - G->replaceAllUsesWith(F); - G->eraseFromParent(); - ++NumFunctionsMerged; - return true; - } + assert(0 && "Unknown LinkageType."); + return ExternalWeak; +} - if (G->hasExternalLinkage() || G->hasWeakLinkage()) { - GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", - F, G->getParent()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - GA->takeName(G); - GA->setVisibility(G->getVisibility()); - G->replaceAllUsesWith(GA); - G->eraseFromParent(); - ++NumFunctionsMerged; - return true; +static void ThunkGToF(Function *F, Function *G) { + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", + G->getParent()); + BasicBlock *BB = BasicBlock::Create("", NewG); + + std::vector<Value *> Args; + unsigned i = 0; + const FunctionType *FFTy = F->getFunctionType(); + for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); + AI != AE; ++AI) { + if (FFTy->getParamType(i) == AI->getType()) + Args.push_back(AI); + else { + Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB); + Args.push_back(BCI); } + ++i; } - if (F->hasWeakLinkage() && G->hasWeakLinkage()) { - GlobalAlias *GA_F = new GlobalAlias(F->getType(), F->getLinkage(), "", - 0, F->getParent()); - GA_F->takeName(F); - GA_F->setVisibility(F->getVisibility()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - F->replaceAllUsesWith(GA_F); - F->setName("folded." + GA_F->getName()); - F->setLinkage(GlobalValue::ExternalLinkage); - GA_F->setAliasee(F); - - GlobalAlias *GA_G = new GlobalAlias(G->getType(), G->getLinkage(), "", - F, G->getParent()); - GA_G->takeName(G); - GA_G->setVisibility(G->getVisibility()); - G->replaceAllUsesWith(GA_G); - G->eraseFromParent(); - - ++NumFunctionsMerged; - return true; + CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); + CI->setTailCall(); + CI->setCallingConv(F->getCallingConv()); + if (NewG->getReturnType() == Type::VoidTy) { + ReturnInst::Create(BB); + } else if (CI->getType() != NewG->getReturnType()) { + Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); + ReturnInst::Create(BCI, BB); + } else { + ReturnInst::Create(CI, BB); } - DOUT << "Failed on " << F->getName() << " and " << G->getName() << "\n"; + NewG->copyAttributesFrom(G); + NewG->takeName(G); + G->replaceAllUsesWith(NewG); + G->eraseFromParent(); - ++NumMergeFails; - return false; + // TODO: look at direct callers to G and make them all direct callers to F. } -static bool hasAddressTaken(User *U) { - for (User::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) { - User *Use = *I; +static void AliasGToF(Function *F, Function *G) { + if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage()) + return ThunkGToF(F, G); + + GlobalAlias *GA = new GlobalAlias( + G->getType(), G->getLinkage(), "", + ConstantExpr::getBitCast(F, G->getType()), G->getParent()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); +} - // 'call (bitcast @F to ...)' happens a lot. - while (isa<ConstantExpr>(Use) && Use->hasOneUse()) { - Use = *Use->use_begin(); - } +static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { + Function *F = FnVec[i]; + Function *G = FnVec[j]; - if (isa<ConstantExpr>(Use)) { - if (hasAddressTaken(Use)) - return true; - } + LinkageCategory catF = categorize(F); + LinkageCategory catG = categorize(G); - if (!isa<CallInst>(Use) && !isa<InvokeInst>(Use)) - return true; + if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(catF, catG); + } - // Make sure we aren't passing U as a parameter to call instead of the - // callee. - if (CallSite(cast<Instruction>(Use)).hasArgument(U)) - return true; + switch (catF) { + case ExternalStrong: + switch (catG) { + case ExternalStrong: + case ExternalWeak: + ThunkGToF(F, G); + break; + case Internal: + if (G->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + } + break; + + case ExternalWeak: { + assert(catG == ExternalWeak); + + // Make them both thunks to the same internal function. + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + F->replaceAllUsesWith(H); + + ThunkGToF(F, G); + ThunkGToF(F, H); + + F->setLinkage(GlobalValue::InternalLinkage); + } break; + + case Internal: + switch (catG) { + case ExternalStrong: + assert(0); + // fall-through + case ExternalWeak: + if (F->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + case Internal: { + bool addrTakenF = F->hasAddressTaken(); + bool addrTakenG = G->hasAddressTaken(); + if (!addrTakenF && addrTakenG) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(addrTakenF, addrTakenG); + } + + if (addrTakenF && addrTakenG) { + ThunkGToF(F, G); + } else { + assert(!addrTakenG); + AliasGToF(F, G); + } + } break; + } + break; } - return false; + ++NumFunctionsMerged; + return true; } +// ===----------------------------------------------------------------------=== +// Pass definition +// ===----------------------------------------------------------------------=== + bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; @@ -329,25 +619,19 @@ bool MergeFunctions::runOnModule(Module &M) { if (F->isDeclaration() || F->isIntrinsic()) continue; - if (!F->hasLocalLinkage() && !F->hasExternalLinkage() && - !F->hasWeakLinkage()) - continue; - - if (hasAddressTaken(F)) - continue; - FnMap[hash(F)].push_back(F); } - // TODO: instead of running in a loop, we could also fold functions in callgraph - // order. Constructing the CFG probably isn't cheaper than just running in a loop. + // TODO: instead of running in a loop, we could also fold functions in + // callgraph order. Constructing the CFG probably isn't cheaper than just + // running in a loop, unless it happened to already be available. bool LocalChanged; do { LocalChanged = false; + DOUT << "size: " << FnMap.size() << "\n"; for (std::map<unsigned long, std::vector<Function *> >::iterator I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { - DOUT << "size: " << FnMap.size() << "\n"; std::vector<Function *> &FnVec = I->second; DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n"; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp new file mode 100644 index 0000000..b3a25540 --- /dev/null +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -0,0 +1,171 @@ +//===- PartialInlining.cpp - Inline parts of functions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs partial inlining, typically by inlining an if statement +// that surrounds the body of the function. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "partialinlining" +#include "llvm/Transforms/IPO.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CFG.h" +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN PartialInliner : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + static char ID; // Pass identification, replacement for typeid + PartialInliner() : ModulePass(&ID) {} + + bool runOnModule(Module& M); + + private: + Function* unswitchFunction(Function* F); + }; +} + +char PartialInliner::ID = 0; +static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner"); + +ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } + +Function* PartialInliner::unswitchFunction(Function* F) { + // First, verify that this function is an unswitching candidate... + BasicBlock* entryBlock = F->begin(); + if (!isa<BranchInst>(entryBlock->getTerminator())) + return 0; + + BasicBlock* returnBlock = 0; + BasicBlock* nonReturnBlock = 0; + unsigned returnCount = 0; + for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); + SI != SE; ++SI) + if (isa<ReturnInst>((*SI)->getTerminator())) { + returnBlock = *SI; + returnCount++; + } else + nonReturnBlock = *SI; + + if (returnCount != 1) + return 0; + + // Clone the function, so that we can hack away on it. + DenseMap<const Value*, Value*> ValueMap; + Function* duplicateFunction = CloneFunction(F, ValueMap); + duplicateFunction->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(duplicateFunction); + BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]); + BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]); + BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]); + + // Go ahead and update all uses to the duplicate, so that we can just + // use the inliner functionality when we're done hacking. + F->replaceAllUsesWith(duplicateFunction); + + // Special hackery is needed with PHI nodes that have inputs from more than + // one extracted block. For simplicity, just split the PHIs into a two-level + // sequence of PHIs, some of which will go in the extracted region, and some + // of which will go outside. + BasicBlock* preReturn = newReturnBlock; + newReturnBlock = newReturnBlock->splitBasicBlock( + newReturnBlock->getFirstNonPHI()); + BasicBlock::iterator I = preReturn->begin(); + BasicBlock::iterator Ins = newReturnBlock->begin(); + while (I != preReturn->end()) { + PHINode* OldPhi = dyn_cast<PHINode>(I); + if (!OldPhi) break; + + PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins); + OldPhi->replaceAllUsesWith(retPhi); + Ins = newReturnBlock->getFirstNonPHI(); + + retPhi->addIncoming(I, preReturn); + retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), + newEntryBlock); + OldPhi->removeIncomingValue(newEntryBlock); + + ++I; + } + newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); + + // Gather up the blocks that we're going to extract. + std::vector<BasicBlock*> toExtract; + toExtract.push_back(newNonReturnBlock); + for (Function::iterator FI = duplicateFunction->begin(), + FE = duplicateFunction->end(); FI != FE; ++FI) + if (&*FI != newEntryBlock && &*FI != newReturnBlock && + &*FI != newNonReturnBlock) + toExtract.push_back(FI); + + // The CodeExtractor needs a dominator tree. + DominatorTree DT; + DT.runOnFunction(*duplicateFunction); + + // Extract the body of the the if. + Function* extractedFunction = ExtractCodeRegion(DT, toExtract); + + // Inline the top-level if test into all callers. + std::vector<User*> Users(duplicateFunction->use_begin(), + duplicateFunction->use_end()); + for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); + UI != UE; ++UI) + if (CallInst* CI = dyn_cast<CallInst>(*UI)) + InlineFunction(CI); + else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI)) + InlineFunction(II); + + // Ditch the duplicate, since we're done with it, and rewrite all remaining + // users (function pointers, etc.) back to the original function. + duplicateFunction->replaceAllUsesWith(F); + duplicateFunction->eraseFromParent(); + + return extractedFunction; +} + +bool PartialInliner::runOnModule(Module& M) { + std::vector<Function*> worklist; + worklist.reserve(M.size()); + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) + if (!FI->use_empty() && !FI->isDeclaration()) + worklist.push_back(&*FI); + + bool changed = false; + while (!worklist.empty()) { + Function* currFunc = worklist.back(); + worklist.pop_back(); + + if (currFunc->use_empty()) continue; + + bool recursive = false; + for (Function::use_iterator UI = currFunc->use_begin(), + UE = currFunc->use_end(); UI != UE; ++UI) + if (Instruction* I = dyn_cast<Instruction>(UI)) + if (I->getParent()->getParent() == currFunc) { + recursive = true; + break; + } + if (recursive) continue; + + + if (Function* newFunc = unswitchFunction(currFunc)) { + worklist.push_back(newFunc); + changed = true; + } + + } + + return changed; +}
\ No newline at end of file diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 83503fd..38b1198 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -168,7 +168,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // Expand the code for the iteration count into the preheader of the loop. BasicBlock *Preheader = L->getLoopPreheader(); - Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(), + Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), Preheader->getTerminator()); // Insert a new icmp_ne or icmp_eq instruction before the branch. @@ -392,10 +392,31 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // in this loop, insert a canonical induction variable of the largest size. Value *IndVar = 0; if (NeedCannIV) { + // Check to see if the loop already has a canonical-looking induction + // variable. If one is present and it's wider than the planned canonical + // induction variable, temporarily remove it, so that the Rewriter + // doesn't attempt to reuse it. + PHINode *OldCannIV = L->getCanonicalInductionVariable(); + if (OldCannIV) { + if (SE->getTypeSizeInBits(OldCannIV->getType()) > + SE->getTypeSizeInBits(LargestType)) + OldCannIV->removeFromParent(); + else + OldCannIV = 0; + } + IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType); + ++NumInserted; Changed = true; DOUT << "INDVARS: New CanIV: " << *IndVar; + + // Now that the official induction variable is established, reinsert + // the old canonical-looking variable after it so that the IR remains + // consistent. It will be deleted as part of the dead-PHI deletion at + // the end of the pass. + if (OldCannIV) + OldCannIV->insertAfter(cast<Instruction>(IndVar)); } // If we have a trip count expression, rewrite the loop's exit condition @@ -459,8 +480,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, E = List.end(); UI != E; ++UI) { SCEVHandle Offset = UI->getOffset(); Value *Op = UI->getOperandValToReplace(); + const Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); - bool isSigned = UI->isSigned(); // Compute the final addrec to expand into code. SCEVHandle AR = IU->getReplacementExpr(*UI); @@ -471,7 +492,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // Expand loop-invariant values in the loop preheader. They will // be sunk to the exit block later, if possible. NewVal = - Rewriter.expandCodeFor(AR, LargestType, + Rewriter.expandCodeFor(AR, UseTy, L->getLoopPreheader()->getTerminator()); Rewriter.setInsertionPoint(I); ++NumReplaced; @@ -485,74 +506,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, if (!Stride->isLoopInvariant(L)) continue; - const Type *IVTy = Offset->getType(); - const Type *UseTy = Op->getType(); - - // Promote the Offset and Stride up to the canonical induction - // variable's bit width. - SCEVHandle PromotedOffset = Offset; - SCEVHandle PromotedStride = Stride; - if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) { - // It doesn't matter for correctness whether zero or sign extension - // is used here, since the value is truncated away below, but if the - // value is signed, sign extension is more likely to be folded. - if (isSigned) { - PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType); - PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType); - } else { - PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType); - // If the stride is obviously negative, use sign extension to - // produce things like x-1 instead of x+255. - if (isa<SCEVConstant>(PromotedStride) && - cast<SCEVConstant>(PromotedStride) - ->getValue()->getValue().isNegative()) - PromotedStride = SE->getSignExtendExpr(PromotedStride, - LargestType); - else - PromotedStride = SE->getZeroExtendExpr(PromotedStride, - LargestType); - } - } - - // Create the SCEV representing the offset from the canonical - // induction variable, still in the canonical induction variable's - // type, so that all expanded arithmetic is done in the same type. - SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType), - PromotedStride, L); - // Add the PromotedOffset as a separate step, because it may not be - // loop-invariant. - NewAR = SE->getAddExpr(NewAR, PromotedOffset); - - // Expand the addrec into instructions. - Value *V = Rewriter.expandCodeFor(NewAR); - - // Insert an explicit cast if necessary to truncate the value - // down to the original stride type. This is done outside of - // SCEVExpander because in SCEV expressions, a truncate of an - // addrec is always folded. - if (LargestType != IVTy) { - if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) - NewAR = SE->getTruncateExpr(NewAR, IVTy); - if (Rewriter.isInsertedExpression(NewAR)) - V = Rewriter.expandCodeFor(NewAR); - else { - V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false, - IVTy, false), - V, IVTy); - assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) && - "LargestType wasn't actually the largest type!"); - // Force the rewriter to use this trunc whenever this addrec - // appears so that it doesn't insert new phi nodes or - // arithmetic in a different type. - Rewriter.addInsertedValue(V, NewAR); - } - } - - DOUT << "INDVARS: Made offset-and-trunc IV for offset " - << *IVTy << " " << *Offset << ": "; - DEBUG(WriteAsOperand(*DOUT, V, false)); - DOUT << "\n"; - // Now expand it into actual Instructions and patch it into place. NewVal = Rewriter.expandCodeFor(AR, UseTy); } diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 6d2ff0e..5465e4a 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -2608,21 +2608,6 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName()); } - - if (Op1I->hasOneUse()) { - // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression - // is not used by anyone else... - // - if (Op1I->getOpcode() == Instruction::FSub) { - // Swap the two operands of the subexpr... - Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); - Op1I->setOperand(0, IIOp1); - Op1I->setOperand(1, IIOp0); - - // Create the new top level fadd instruction... - return BinaryOperator::CreateFAdd(Op0, Op1); - } - } } return 0; @@ -11824,7 +11809,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG - if (isa<ConstantPointerNull>(Ptr)) { + if (isa<ConstantPointerNull>(Ptr) && + cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) { if (!isa<UndefValue>(Val)) { SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast<Instruction>(Val)) diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index b499279..5a85a04 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -125,13 +125,17 @@ static bool MarkAliveBlocks(BasicBlock *BB, } } - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) - if (isa<ConstantPointerNull>(SI->getOperand(1)) || - isa<UndefValue>(SI->getOperand(1))) { + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + Value *Ptr = SI->getOperand(1); + + if (isa<UndefValue>(Ptr) || + (isa<ConstantPointerNull>(Ptr) && + cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) { ChangeToUnreachable(SI); Changed = true; break; } + } } // Turn invokes that call 'nounwind' functions into ordinary calls. |