diff options
Diffstat (limited to 'lib/Transforms/IPO/MergeFunctions.cpp')
-rw-r--r-- | lib/Transforms/IPO/MergeFunctions.cpp | 646 |
1 files changed, 431 insertions, 215 deletions
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 5d838f9..cccffca 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -67,42 +67,87 @@ using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); +STATISTIC(NumThunksWritten, "Number of thunks generated"); +STATISTIC(NumAliasesWritten, "Number of aliases generated"); +STATISTIC(NumDoubleWeak, "Number of new functions created"); + +/// Creates a hash-code for the function which is the same for any two +/// functions that will compare equal, without looking at the instructions +/// inside the function. +static unsigned profileFunction(const Function *F) { + const FunctionType *FTy = F->getFunctionType(); -namespace { - /// MergeFunctions finds functions which will generate identical machine code, - /// by considering all pointer types to be equivalent. Once identified, - /// MergeFunctions will fold them by replacing a call to one to a call to a - /// bitcast of the other. - /// - class MergeFunctions : public ModulePass { - public: - static char ID; - MergeFunctions() : ModulePass(ID) {} - - bool runOnModule(Module &M); - - private: - /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G - /// may be deleted, or may be converted into a thunk. In either case, it - /// should never be visited again. - void MergeTwoFunctions(Function *F, Function *G) const; - - /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also - /// replace direct uses of G with bitcast(F). - void WriteThunk(Function *F, Function *G) const; - - TargetData *TD; - }; + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(FTy->getReturnType()->getTypeID()); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(FTy->getParamType(i)->getTypeID()); + return ID.ComputeHash(); } -char MergeFunctions::ID = 0; -INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false); +namespace { + +/// ComparableFunction - A struct that pairs together functions with a +/// TargetData so that we can keep them together as elements in the DenseSet. +class ComparableFunction { +public: + static const ComparableFunction EmptyKey; + static const ComparableFunction TombstoneKey; + static TargetData * const LookupOnly; + + ComparableFunction(Function *Func, TargetData *TD) + : Func(Func), Hash(profileFunction(Func)), TD(TD) {} + + Function *getFunc() const { return Func; } + unsigned getHash() const { return Hash; } + TargetData *getTD() const { return TD; } + + // Drops AssertingVH reference to the function. Outside of debug mode, this + // does nothing. + void release() { + assert(Func && + "Attempted to release function twice, or release empty/tombstone!"); + Func = NULL; + } + +private: + explicit ComparableFunction(unsigned Hash) + : Func(NULL), Hash(Hash), TD(NULL) {} + + AssertingVH<Function> Func; + unsigned Hash; + TargetData *TD; +}; + +const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); +const ComparableFunction ComparableFunction::TombstoneKey = + ComparableFunction(1); +TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1); -ModulePass *llvm::createMergeFunctionsPass() { - return new MergeFunctions(); +} + +namespace llvm { + template <> + struct DenseMapInfo<ComparableFunction> { + static ComparableFunction getEmptyKey() { + return ComparableFunction::EmptyKey; + } + static ComparableFunction getTombstoneKey() { + return ComparableFunction::TombstoneKey; + } + static unsigned getHashValue(const ComparableFunction &CF) { + return CF.getHash(); + } + static bool isEqual(const ComparableFunction &LHS, + const ComparableFunction &RHS); + }; } namespace { + /// FunctionComparator - Compares two functions to determine whether or not /// they will generate machine code with the same behaviour. TargetData is /// used if available. The comparator always fails conservatively (erring on the @@ -111,34 +156,34 @@ class FunctionComparator { public: FunctionComparator(const TargetData *TD, const Function *F1, const Function *F2) - : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {} + : F1(F1), F2(F2), TD(TD) {} - /// Compare - test whether the two functions have equivalent behaviour. - bool Compare(); + /// Test whether the two functions have equivalent behaviour. + bool compare(); private: - /// Compare - test whether two basic blocks have equivalent behaviour. - bool Compare(const BasicBlock *BB1, const BasicBlock *BB2); + /// Test whether two basic blocks have equivalent behaviour. + bool compare(const BasicBlock *BB1, const BasicBlock *BB2); - /// Enumerate - Assign or look up previously assigned numbers for the two - /// values, and return whether the numbers are equal. Numbers are assigned in - /// the order visited. - bool Enumerate(const Value *V1, const Value *V2); + /// Assign or look up previously assigned numbers for the two values, and + /// return whether the numbers are equal. Numbers are assigned in the order + /// visited. + bool enumerate(const Value *V1, const Value *V2); - /// isEquivalentOperation - Compare two Instructions for equivalence, similar - /// to Instruction::isSameOperationAs but with modifications to the type + /// Compare two Instructions for equivalence, similar to + /// Instruction::isSameOperationAs but with modifications to the type /// comparison. bool isEquivalentOperation(const Instruction *I1, const Instruction *I2) const; - /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic. + /// Compare two GEPs for equivalent pointer arithmetic. bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); bool isEquivalentGEP(const GetElementPtrInst *GEP1, const GetElementPtrInst *GEP2) { return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); } - /// isEquivalentType - Compare two Types, treating all pointer types as equal. + /// Compare two Types, treating all pointer types as equal. bool isEquivalentType(const Type *Ty1, const Type *Ty2) const; // The two functions undergoing comparison. @@ -146,20 +191,26 @@ private: const TargetData *TD; - typedef DenseMap<const Value *, unsigned long> IDMap; - IDMap Map1, Map2; - unsigned long IDMap1Count, IDMap2Count; + DenseMap<const Value *, const Value *> id_map; + DenseSet<const Value *> seen_values; }; + } -/// isEquivalentType - any two pointers in the same address space are -/// equivalent. Otherwise, standard type equivalence rules apply. +// Any two pointers in the same address space are equivalent, intptr_t and +// pointers are equivalent. Otherwise, standard type equivalence rules apply. bool FunctionComparator::isEquivalentType(const Type *Ty1, const Type *Ty2) const { if (Ty1 == Ty2) return true; - if (Ty1->getTypeID() != Ty2->getTypeID()) + if (Ty1->getTypeID() != Ty2->getTypeID()) { + if (TD) { + LLVMContext &Ctx = Ty1->getContext(); + if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true; + if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true; + } return false; + } switch(Ty1->getTypeID()) { default: @@ -167,6 +218,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, // Fall through in Release mode. case Type::IntegerTyID: case Type::OpaqueTyID: + case Type::VectorTyID: // Ty1 == Ty2 would have returned true earlier. return false; @@ -225,21 +277,18 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, return ATy1->getNumElements() == ATy2->getNumElements() && isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); } - - case Type::VectorTyID: { - const VectorType *VTy1 = cast<VectorType>(Ty1); - const VectorType *VTy2 = cast<VectorType>(Ty2); - return VTy1->getNumElements() == VTy2->getNumElements() && - isEquivalentType(VTy1->getElementType(), VTy2->getElementType()); - } } } -/// isEquivalentOperation - determine whether the two operations are the same -/// except that pointer-to-A and pointer-to-B are equivalent. This should be -/// kept in sync with Instruction::isSameOperationAs. +// Determine whether the two operations are the same except that pointer-to-A +// and pointer-to-B are equivalent. This should be kept in sync with +// Instruction::isSameOperationAs. bool FunctionComparator::isEquivalentOperation(const Instruction *I1, const Instruction *I2) const { + // Differences from Instruction::isSameOperationAs: + // * replace type comparison with calls to isEquivalentType. + // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top + // * because of the above, we don't test for the tail bit on calls later on if (I1->getOpcode() != I2->getOpcode() || I1->getNumOperands() != I2->getNumOperands() || !isEquivalentType(I1->getType(), I2->getType()) || @@ -263,14 +312,11 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(I1)) - return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() && - CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && - CI->getAttributes().getRawPointer() == - cast<CallInst>(I2)->getAttributes().getRawPointer(); + return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && + CI->getAttributes() == cast<CallInst>(I2)->getAttributes(); if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && - CI->getAttributes().getRawPointer() == - cast<InvokeInst>(I2)->getAttributes().getRawPointer(); + CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes(); if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) return false; @@ -291,8 +337,7 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, return true; } -/// isEquivalentGEP - determine whether two GEP operations perform the same -/// underlying arithmetic. +// Determine whether two GEP operations perform the same underlying arithmetic. bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { // When we have target data, we can reduce the GEP down to the value in bytes @@ -315,17 +360,17 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, return false; for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) + if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) return false; } return true; } -/// Enumerate - Compare two values used by the two functions under pair-wise -/// comparison. If this is the first time the values are seen, they're added to -/// the mapping so that we will detect mismatches on next use. -bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { +// Compare two values used by the two functions under pair-wise comparison. If +// this is the first time the values are seen, they're added to the mapping so +// that we will detect mismatches on next use. +bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { // Check for function @f1 referring to itself and function @f2 referring to // itself, or referring to each other, or both referring to either of them. // They're all equivalent if the two functions are otherwise equivalent. @@ -334,35 +379,44 @@ bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { if (V1 == F2 && V2 == F1) return true; - // TODO: constant expressions with GEP or references to F1 or F2. - if (isa<Constant>(V1)) - return V1 == V2; - - if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) { - const InlineAsm *IA1 = cast<InlineAsm>(V1); - const InlineAsm *IA2 = cast<InlineAsm>(V2); - return IA1->getAsmString() == IA2->getAsmString() && - IA1->getConstraintString() == IA2->getConstraintString(); + if (const Constant *C1 = dyn_cast<Constant>(V1)) { + if (V1 == V2) return true; + const Constant *C2 = dyn_cast<Constant>(V2); + if (!C2) return false; + // TODO: constant expressions with GEP or references to F1 or F2. + if (C1->isNullValue() && C2->isNullValue() && + isEquivalentType(C1->getType(), C2->getType())) + return true; + // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 + // then they must have equal bit patterns. + return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && + C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType()); } - unsigned long &ID1 = Map1[V1]; - if (!ID1) - ID1 = ++IDMap1Count; + if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2)) + return V1 == V2; - unsigned long &ID2 = Map2[V2]; - if (!ID2) - ID2 = ++IDMap2Count; + // Check that V1 maps to V2. If we find a value that V1 maps to then we simply + // check whether it's equal to V2. When there is no mapping then we need to + // ensure that V2 isn't already equivalent to something else. For this + // purpose, we track the V2 values in a set. - return ID1 == ID2; + const Value *&map_elem = id_map[V1]; + if (map_elem) + return map_elem == V2; + if (!seen_values.insert(V2).second) + return false; + map_elem = V2; + return true; } -/// Compare - test whether two basic blocks have equivalent behaviour. -bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { +// Test whether two basic blocks have equivalent behaviour. +bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); do { - if (!Enumerate(F1I, F2I)) + if (!enumerate(F1I, F2I)) return false; if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { @@ -370,7 +424,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { if (!GEP2) return false; - if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) return false; if (!isEquivalentGEP(GEP1, GEP2)) @@ -384,7 +438,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { Value *OpF1 = F1I->getOperand(i); Value *OpF2 = F2I->getOperand(i); - if (!Enumerate(OpF1, OpF2)) + if (!enumerate(OpF1, OpF2)) return false; if (OpF1->getValueID() != OpF2->getValueID() || @@ -399,8 +453,8 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { return F1I == F1E && F2I == F2E; } -/// Compare - test whether the two functions have equivalent behaviour. -bool FunctionComparator::Compare() { +// Test whether the two functions have equivalent behaviour. +bool FunctionComparator::compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. @@ -431,14 +485,14 @@ bool FunctionComparator::Compare() { return false; assert(F1->arg_size() == F2->arg_size() && - "Identical functions have a different number of args."); + "Identically typed functions have different numbers of args!"); // Visit the arguments so that they get enumerated in the order they're // passed in. for (Function::const_arg_iterator f1i = F1->arg_begin(), f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { - if (!Enumerate(f1i, f2i)) - llvm_unreachable("Arguments repeat"); + if (!enumerate(f1i, f2i)) + llvm_unreachable("Arguments repeat!"); } // We do a CFG-ordered walk since the actual ordering of the blocks in the @@ -456,7 +510,7 @@ bool FunctionComparator::Compare() { const BasicBlock *F1BB = F1BBs.pop_back_val(); const BasicBlock *F2BB = F2BBs.pop_back_val(); - if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB)) + if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) return false; const TerminatorInst *F1TI = F1BB->getTerminator(); @@ -474,23 +528,190 @@ bool FunctionComparator::Compare() { return true; } -/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace -/// direct uses of G with bitcast(F). -void MergeFunctions::WriteThunk(Function *F, Function *G) const { +namespace { + +/// MergeFunctions finds functions which will generate identical machine code, +/// by considering all pointer types to be equivalent. Once identified, +/// MergeFunctions will fold them by replacing a call to one to a call to a +/// bitcast of the other. +/// +class MergeFunctions : public ModulePass { +public: + static char ID; + MergeFunctions() + : ModulePass(ID), HasGlobalAliases(false) { + initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M); + +private: + typedef DenseSet<ComparableFunction> FnSetType; + + /// A work queue of functions that may have been modified and should be + /// analyzed again. + std::vector<WeakVH> Deferred; + + /// Insert a ComparableFunction into the FnSet, or merge it away if it's + /// equal to one that's already present. + bool insert(ComparableFunction &NewF); + + /// Remove a Function from the FnSet and queue it up for a second sweep of + /// analysis. + void remove(Function *F); + + /// Find the functions that use this Value and remove them from FnSet and + /// queue the functions. + void removeUsers(Value *V); + + /// Replace all direct calls of Old with calls of New. Will bitcast New if + /// necessary to make types match. + void replaceDirectCallers(Function *Old, Function *New); + + /// Merge two equivalent functions. Upon completion, G may be deleted, or may + /// be converted into a thunk. In either case, it should never be visited + /// again. + void mergeTwoFunctions(Function *F, Function *G); + + /// Replace G with a thunk or an alias to F. Deletes G. + void writeThunkOrAlias(Function *F, Function *G); + + /// Replace G with a simple tail call to bitcast(F). Also replace direct uses + /// of G with bitcast(F). Deletes G. + void writeThunk(Function *F, Function *G); + + /// Replace G with an alias to F. Deletes G. + void writeAlias(Function *F, Function *G); + + /// The set of all distinct functions. Use the insert() and remove() methods + /// to modify it. + FnSetType FnSet; + + /// TargetData for more accurate GEP comparisons. May be NULL. + TargetData *TD; + + /// Whether or not the target supports global aliases. + bool HasGlobalAliases; +}; + +} // end anonymous namespace + +char MergeFunctions::ID = 0; +INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) + +ModulePass *llvm::createMergeFunctionsPass() { + return new MergeFunctions(); +} + +bool MergeFunctions::runOnModule(Module &M) { + bool Changed = false; + TD = getAnalysisIfAvailable<TargetData>(); + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) + Deferred.push_back(WeakVH(I)); + } + FnSet.resize(Deferred.size()); + + do { + std::vector<WeakVH> Worklist; + Deferred.swap(Worklist); + + DEBUG(dbgs() << "size of module: " << M.size() << '\n'); + DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); + + // Insert only strong functions and merge them. Strong function merging + // always deletes one of them. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + !F->mayBeOverridden()) { + ComparableFunction CF = ComparableFunction(F, TD); + Changed |= insert(CF); + } + } + + // Insert only weak functions and merge them. By doing these second we + // create thunks to the strong function when possible. When two weak + // functions are identical, we create a new strong function with two weak + // weak thunks to it which are identical but not mergable. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + F->mayBeOverridden()) { + ComparableFunction CF = ComparableFunction(F, TD); + Changed |= insert(CF); + } + } + DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); + } while (!Deferred.empty()); + + FnSet.clear(); + + return Changed; +} + +bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS, + const ComparableFunction &RHS) { + if (LHS.getFunc() == RHS.getFunc() && + LHS.getHash() == RHS.getHash()) + return true; + if (!LHS.getFunc() || !RHS.getFunc()) + return false; + + // One of these is a special "underlying pointer comparison only" object. + if (LHS.getTD() == ComparableFunction::LookupOnly || + RHS.getTD() == ComparableFunction::LookupOnly) + return false; + + assert(LHS.getTD() == RHS.getTD() && + "Comparing functions for different targets"); + + return FunctionComparator(LHS.getTD(), LHS.getFunc(), + RHS.getFunc()).compare(); +} + +// Replace direct callers of Old with New. +void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { + Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE;) { + Value::use_iterator TheIter = UI; + ++UI; + CallSite CS(*TheIter); + if (CS && CS.isCallee(TheIter)) { + remove(CS.getInstruction()->getParent()->getParent()); + TheIter.getUse().set(BitcastNew); + } + } +} + +// Replace G with an alias to F if possible, or else a thunk to F. Deletes G. +void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { + if (HasGlobalAliases && G->hasUnnamedAddr()) { + if (G->hasExternalLinkage() || G->hasLocalLinkage() || + G->hasWeakLinkage()) { + writeAlias(F, G); + return; + } + } + + writeThunk(F, G); +} + +// Replace G with a simple tail call to bitcast(F). Also replace direct uses +// of G with bitcast(F). Deletes G. +void MergeFunctions::writeThunk(Function *F, Function *G) { if (!G->mayBeOverridden()) { // Redirect direct callers of G to F. - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - for (Value::use_iterator UI = G->use_begin(), UE = G->use_end(); - UI != UE;) { - Value::use_iterator TheIter = UI; - ++UI; - CallSite CS(*TheIter); - if (CS && CS.isCallee(TheIter)) - TheIter.getUse().set(BitcastF); - } + replaceDirectCallers(G, F); } - // If G was internal then we may have replaced all uses if G with F. If so, + // If G was internal then we may have replaced all uses of G with F. If so, // stop here and delete G. There's no need for a thunk. if (G->hasLocalLinkage() && G->use_empty()) { G->eraseFromParent(); @@ -522,131 +743,126 @@ void MergeFunctions::WriteThunk(Function *F, Function *G) const { NewG->copyAttributesFrom(G); NewG->takeName(G); + removeUsers(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); + + DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); + ++NumThunksWritten; } -/// MergeTwoFunctions - Merge two equivalent functions. Upon completion, -/// Function G is deleted. -void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const { - if (F->isWeakForLinker()) { - assert(G->isWeakForLinker()); +// Replace G with an alias to F and delete G. +void MergeFunctions::writeAlias(Function *F, Function *G) { + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", + BitcastF, G->getParent()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + removeUsers(G); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); + + DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); + ++NumAliasesWritten; +} + +// Merge two equivalent functions. Upon completion, Function G is deleted. +void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { + if (F->mayBeOverridden()) { + assert(G->mayBeOverridden()); + + if (HasGlobalAliases) { + // Make them both thunks to the same internal function. + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + removeUsers(F); + F->replaceAllUsesWith(H); - // Make them both thunks to the same internal function. - Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", - F->getParent()); - H->copyAttributesFrom(F); - H->takeName(F); - F->replaceAllUsesWith(H); + unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); - unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); + writeAlias(F, G); + writeAlias(F, H); - WriteThunk(F, G); - WriteThunk(F, H); + F->setAlignment(MaxAlignment); + F->setLinkage(GlobalValue::PrivateLinkage); + } else { + // We can't merge them. Instead, pick one and update all direct callers + // to call it and hope that we improve the instruction cache hit rate. + replaceDirectCallers(G, F); + } - F->setAlignment(MaxAlignment); - F->setLinkage(GlobalValue::InternalLinkage); + ++NumDoubleWeak; } else { - WriteThunk(F, G); + writeThunkOrAlias(F, G); } ++NumFunctionsMerged; } -static unsigned ProfileFunction(const Function *F) { - const FunctionType *FTy = F->getFunctionType(); - - FoldingSetNodeID ID; - ID.AddInteger(F->size()); - ID.AddInteger(F->getCallingConv()); - ID.AddBoolean(F->hasGC()); - ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(FTy->getReturnType()->getTypeID()); - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(FTy->getParamType(i)->getTypeID()); - return ID.ComputeHash(); -} - -class ComparableFunction { -public: - ComparableFunction(Function *Func, TargetData *TD) - : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {} +// Insert a ComparableFunction into the FnSet, or merge it away if equal to one +// that was already inserted. +bool MergeFunctions::insert(ComparableFunction &NewF) { + std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); + if (Result.second) { + DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); + return false; + } - AssertingVH<Function> const Func; - const unsigned Hash; - TargetData * const TD; -}; + const ComparableFunction &OldF = *Result.first; -struct MergeFunctionsEqualityInfo { - static ComparableFunction *getEmptyKey() { - return reinterpret_cast<ComparableFunction*>(0); - } - static ComparableFunction *getTombstoneKey() { - return reinterpret_cast<ComparableFunction*>(-1); - } - static unsigned getHashValue(const ComparableFunction *CF) { - return CF->Hash; - } - static bool isEqual(const ComparableFunction *LHS, - const ComparableFunction *RHS) { - if (LHS == RHS) - return true; - if (LHS == getEmptyKey() || LHS == getTombstoneKey() || - RHS == getEmptyKey() || RHS == getTombstoneKey()) - return false; - assert(LHS->TD == RHS->TD && "Comparing functions for different targets"); - return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare(); - } -}; + // Never thunk a strong function to a weak function. + assert(!OldF.getFunc()->mayBeOverridden() || + NewF.getFunc()->mayBeOverridden()); -bool MergeFunctions::runOnModule(Module &M) { - typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType; + DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " + << NewF.getFunc()->getName() << '\n'); - bool Changed = false; - TD = getAnalysisIfAvailable<TargetData>(); + Function *DeleteF = NewF.getFunc(); + NewF.release(); + mergeTwoFunctions(OldF.getFunc(), DeleteF); + return true; +} - std::vector<Function *> Funcs; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) - Funcs.push_back(F); +// Remove a function from FnSet. If it was already in FnSet, add it to Deferred +// so that we'll look at it in the next round. +void MergeFunctions::remove(Function *F) { + // We need to make sure we remove F, not a function "equal" to F per the + // function equality comparator. + // + // The special "lookup only" ComparableFunction bypasses the expensive + // function comparison in favour of a pointer comparison on the underlying + // Function*'s. + ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); + if (FnSet.erase(CF)) { + DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); + Deferred.push_back(F); } +} - bool LocalChanged; - do { - LocalChanged = false; - - FnSetType FnSet; - for (unsigned i = 0, e = Funcs.size(); i != e;) { - Function *F = Funcs[i]; - ComparableFunction *NewF = new ComparableFunction(F, TD); - std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); - if (!Result.second) { - ComparableFunction *&OldF = *Result.first; - assert(OldF && "Expected a hash collision"); - - // NewF will be deleted in favour of OldF unless NewF is strong and - // OldF is weak in which case swap them to keep the strong definition. - - if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker()) - std::swap(OldF, NewF); - - DEBUG(dbgs() << " " << OldF->Func->getName() << " == " - << NewF->Func->getName() << '\n'); - - Funcs.erase(Funcs.begin() + i); - --e; - - Function *DeleteF = NewF->Func; - delete NewF; - MergeTwoFunctions(OldF->Func, DeleteF); - LocalChanged = true; - Changed = true; - } else { - ++i; +// For each instruction used by the value, remove() the function that contains +// the instruction. This should happen right before a call to RAUW. +void MergeFunctions::removeUsers(Value *V) { + std::vector<Value *> Worklist; + Worklist.push_back(V); + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + Use &U = UI.getUse(); + if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { + remove(I->getParent()->getParent()); + } else if (isa<GlobalValue>(U.getUser())) { + // do nothing + } else if (Constant *C = dyn_cast<Constant>(U.getUser())) { + for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end(); + CUI != CUE; ++CUI) + Worklist.push_back(*CUI); } } - DeleteContainerPointers(FnSet); - } while (LocalChanged); - - return Changed; + } } |