diff options
Diffstat (limited to 'lib/Transforms')
72 files changed, 3037 insertions, 1693 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 0c650cf..54a7f67 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -771,8 +771,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index efdeec5..179b150 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -20,5 +20,4 @@ add_llvm_library(LLVMipo PruneEH.cpp StripDeadPrototypes.cpp StripSymbols.cpp - StructRetPromotion.cpp ) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index b423221..d4eaf0c 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -49,7 +49,7 @@ namespace { /// Struct that represents (part of) either a return value or a function /// argument. Used so that arguments and return values can be used - /// interchangably. + /// interchangeably. struct RetOrArg { RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), IsArg(IsArg) {} @@ -273,8 +273,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // function empty. NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. While we're at + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. While we're at // it, remove the dead arguments from the DeadArguments list. // for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), @@ -294,7 +294,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { /// instead. bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) { - if (Fn.isDeclaration()) + if (Fn.isDeclaration() || Fn.mayBeOverridden()) return false; // Functions with local linkage should already have been handled. @@ -379,7 +379,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, // The value is returned from a function. It's only live when the // function's return value is live. We use RetValNum here, for the case // that U is really a use of an insertvalue instruction that uses the - // orginal Use. + // original Use. RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); @@ -894,8 +894,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. i = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++i) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index d4cb712..ded58ac 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -21,6 +21,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -54,6 +55,7 @@ STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); STATISTIC(NumNestRemoved , "Number of nest attributes removed"); STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); +STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); namespace { struct GlobalStatus; @@ -77,6 +79,7 @@ namespace { bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, const SmallPtrSet<const PHINode*, 16> &PHIUsers, const GlobalStatus &GS); + bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); }; } @@ -1191,9 +1194,11 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, const StructType *ST = cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); - Result = + PHINode *NewPN = PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); + Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); } else { llvm_unreachable("Unknown usable value"); @@ -1940,36 +1945,24 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { return Changed; } -/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all /// initializers have an init priority of 65535. GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); if (GV == 0) return 0; - // Found it, verify it's an array of { int, void()* }. - const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType()); - if (!ATy) return 0; - const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); - if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isIntegerTy(32)) return 0; - const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); - if (!PFTy) return 0; - const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); - if (!FTy || !FTy->getReturnType()->isVoidTy() || - FTy->isVarArg() || FTy->getNumParams() != 0) - return 0; - // Verify that the initializer is simple enough for us to handle. We are // only allowed to optimize the initializer if it is unique. if (!GV->hasUniqueInitializer()) return 0; - - ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); - if (!CA) return 0; - + + if (isa<ConstantAggregateZero>(GV->getInitializer())) + return GV; + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - ConstantStruct *CS = dyn_cast<ConstantStruct>(*i); - if (CS == 0) return 0; - + if (isa<ConstantAggregateZero>(*i)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(*i); if (isa<ConstantPointerNull>(CS->getOperand(1))) continue; @@ -1978,8 +1971,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { return 0; // Init priority must be standard. - ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); - if (!CI || CI->getZExtValue() != 65535) + ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) return 0; } @@ -1989,6 +1982,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, /// return a list of the functions and null terminator as a vector. static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector<Function*>(); ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); std::vector<Function*> Result; Result.reserve(CA->getNumOperands()); @@ -2019,7 +2014,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 2147483647); + 0x7fffffff); } CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); } @@ -2696,12 +2691,126 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { return Changed; } +static Function *FindCXAAtExit(Module &M) { + Function *Fn = M.getFunction("__cxa_atexit"); + + if (!Fn) + return 0; + + const FunctionType *FTy = Fn->getFunctionType(); + + // Checking that the function has the right return type, the right number of + // parameters and that they all have pointer types should be enough. + if (!FTy->getReturnType()->isIntegerTy() || + FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return 0; + + return Fn; +} + +/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ +/// destructor and can therefore be eliminated. +/// Note that we assume that other optimization passes have already simplified +/// the code so we only look for a function with a single basic block, where +/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor. +static bool cxxDtorIsEmpty(const Function &Fn, + SmallPtrSet<const Function *, 8> &CalledFunctions) { + // FIXME: We could eliminate C++ destructors if they're readonly/readnone and + // nounwind, but that doesn't seem worth doing. + if (Fn.isDeclaration()) + return false; + + if (++Fn.begin() != Fn.end()) + return false; + + const BasicBlock &EntryBlock = Fn.getEntryBlock(); + for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); + I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + // Ignore debug intrinsics. + if (isa<DbgInfoIntrinsic>(CI)) + continue; + + const Function *CalledFn = CI->getCalledFunction(); + + if (!CalledFn) + return false; + + SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions); + + // Don't treat recursive functions as empty. + if (!NewCalledFunctions.insert(CalledFn)) + return false; + + if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) + return false; + } else if (isa<ReturnInst>(*I)) + return true; + else + return false; + } + + return false; +} + +bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { + /// Itanium C++ ABI p3.3.5: + /// + /// After constructing a global (or local static) object, that will require + /// destruction on exit, a termination function is registered as follows: + /// + /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); + /// + /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the + /// call f(p) when DSO d is unloaded, before all such termination calls + /// registered before this one. It returns zero if registration is + /// successful, nonzero on failure. + + // This pass will look for calls to __cxa_atexit where the function is trivial + // and remove them. + bool Changed = false; + + for (Function::use_iterator I = CXAAtExitFn->use_begin(), + E = CXAAtExitFn->use_end(); I != E;) { + // We're only interested in calls. Theoretically, we could handle invoke + // instructions as well, but neither llvm-gcc nor clang generate invokes + // to __cxa_atexit. + CallInst *CI = dyn_cast<CallInst>(*I++); + if (!CI) + continue; + + Function *DtorFn = + dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); + if (!DtorFn) + continue; + + SmallPtrSet<const Function *, 8> CalledFunctions; + if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) + continue; + + // Just remove the call. + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + CI->eraseFromParent(); + + ++NumCXXDtorsRemoved; + + Changed |= true; + } + + return Changed; +} + bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); + Function *CXAAtExitFn = FindCXAAtExit(M); + bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -2718,6 +2827,11 @@ bool GlobalOpt::runOnModule(Module &M) { // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); + + // Try to remove trivial global destructors. + if (CXAAtExitFn) + LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); + Changed |= LocalChange; } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index c7c2939..25c0134 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -186,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { // Find the returned value Value *V; if (!STy) - V = RI->getOperand(i); + V = RI->getOperand(0); else V = FindInsertedValue(RI->getOperand(0), i); diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index fbe90ce..21dcb51 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -45,7 +45,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripDebugDeclarePass(Registry); initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); - initializeSRETPromotionPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 37eafd7..57f3e77 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -29,7 +29,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include <set> using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 9b9ebad..7cb1d18 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -126,6 +126,8 @@ bool InternalizePass::runOnModule(Module &M) { // FIXME: maybe use private linkage? for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && // Function must be defined here + // Available externally is really just a "declaration with a body". + !I->hasAvailableExternallyLinkage() && !I->hasLocalLinkage() && // Can't already have internal linkage !ExternalNames.count(I->getName())) {// Not marked to keep external? I->setLinkage(GlobalValue::InternalLinkage); @@ -144,9 +146,6 @@ bool InternalizePass::runOnModule(Module &M) { // Never internalize anchors used by the machine module info, else the info // won't find them. (see MachineModuleInfo.) - ExternalNames.insert("llvm.dbg.compile_units"); - ExternalNames.insert("llvm.dbg.global_variables"); - ExternalNames.insert("llvm.dbg.subprograms"); ExternalNames.insert("llvm.global_ctors"); ExternalNames.insert("llvm.global_dtors"); ExternalNames.insert("llvm.noinline"); diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index b545f0b..52ecf17 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -430,7 +430,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) // This PHI node will be in the new block created from the // splitBasicBlock call. - PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), + PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2, "SetJmpReturn", Inst); // Coming from a call to setjmp, the return is 0. diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index cccffca..f741443 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -55,6 +55,7 @@ #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" @@ -125,7 +126,7 @@ private: const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); const ComparableFunction ComparableFunction::TombstoneKey = ComparableFunction(1); -TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1); +TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1); } @@ -212,7 +213,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, return false; } - switch(Ty1->getTypeID()) { + switch (Ty1->getTypeID()) { default: llvm_unreachable("Unknown type!"); // Fall through in Release mode. diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 2afd029..d9d1d10 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -95,7 +95,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; - PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins); + PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index d91c2c4..9470180 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -27,7 +27,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" -#include <set> #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp deleted file mode 100644 index 584deac..0000000 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ /dev/null @@ -1,357 +0,0 @@ -//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass finds functions that return a struct (using a pointer to the struct -// as the first argument of the function, marked with the 'sret' attribute) and -// replaces them with a new function that simply returns each of the elements of -// that struct (using multiple return values). -// -// This pass works under a number of conditions: -// 1. The returned struct must not contain other structs -// 2. The returned struct must only be used to load values from -// 3. The placeholder struct passed in is the result of an alloca -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "sretpromotion" -#include "llvm/Transforms/IPO.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/CallGraphSCCPass.h" -#include "llvm/Instructions.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses"); -STATISTIC(NumSRET , "Number of sret promoted"); -namespace { - /// SRETPromotion - This pass removes sret parameter and updates - /// function to use multiple return value. - /// - struct SRETPromotion : public CallGraphSCCPass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - CallGraphSCCPass::getAnalysisUsage(AU); - } - - virtual bool runOnSCC(CallGraphSCC &SCC); - static char ID; // Pass identification, replacement for typeid - SRETPromotion() : CallGraphSCCPass(ID) { - initializeSRETPromotionPass(*PassRegistry::getPassRegistry()); - } - - private: - CallGraphNode *PromoteReturn(CallGraphNode *CGN); - bool isSafeToUpdateAllCallers(Function *F); - Function *cloneFunctionBody(Function *F, const StructType *STy); - CallGraphNode *updateCallSites(Function *F, Function *NF); - }; -} - -char SRETPromotion::ID = 0; -INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion", - "Promote sret arguments to multiple ret values", false, false) -INITIALIZE_AG_DEPENDENCY(CallGraph) -INITIALIZE_PASS_END(SRETPromotion, "sretpromotion", - "Promote sret arguments to multiple ret values", false, false) - -Pass *llvm::createStructRetPromotionPass() { - return new SRETPromotion(); -} - -bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) { - bool Changed = false; - - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - if (CallGraphNode *NewNode = PromoteReturn(*I)) { - SCC.ReplaceNode(*I, NewNode); - Changed = true; - } - - return Changed; -} - -/// PromoteReturn - This method promotes function that uses StructRet paramater -/// into a function that uses multiple return values. -CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { - Function *F = CGN->getFunction(); - - if (!F || F->isDeclaration() || !F->hasLocalLinkage()) - return 0; - - // Make sure that function returns struct. - if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) - return 0; - - DEBUG(dbgs() << "SretPromotion: Looking at sret function " - << F->getName() << "\n"); - - assert(F->getReturnType()->isVoidTy() && "Invalid function return type"); - Function::arg_iterator AI = F->arg_begin(); - const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType()); - assert(FArgType && "Invalid sret parameter type"); - const llvm::StructType *STy = - dyn_cast<StructType>(FArgType->getElementType()); - assert(STy && "Invalid sret parameter element type"); - - // Check if it is ok to perform this promotion. - if (isSafeToUpdateAllCallers(F) == false) { - DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); - ++NumRejectedSRETUses; - return 0; - } - - DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); - ++NumSRET; - // [1] Replace use of sret parameter - AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", - F->getEntryBlock().begin()); - Value *NFirstArg = F->arg_begin(); - NFirstArg->replaceAllUsesWith(TheAlloca); - - // [2] Find and replace ret instructions - for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { - Instruction *I = BI; - ++BI; - if (isa<ReturnInst>(I)) { - Value *NV = new LoadInst(TheAlloca, "mrv.ld", I); - ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I); - I->replaceAllUsesWith(NR); - I->eraseFromParent(); - } - } - - // [3] Create the new function body and insert it into the module. - Function *NF = cloneFunctionBody(F, STy); - - // [4] Update all call sites to use new function - CallGraphNode *NF_CFN = updateCallSites(F, NF); - - CallGraph &CG = getAnalysis<CallGraph>(); - NF_CFN->stealCalledFunctionsFrom(CG[F]); - - delete CG.removeFunctionFromModule(F); - return NF_CFN; -} - -// Check if it is ok to perform this promotion. -bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { - - if (F->use_empty()) - // No users. OK to modify signature. - return true; - - for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end(); - FnUseI != FnUseE; ++FnUseI) { - // The function is passed in as an argument to (possibly) another function, - // we can't change it! - CallSite CS(*FnUseI); - Instruction *Call = CS.getInstruction(); - // The function is used by something else than a call or invoke instruction, - // we can't change it! - if (!Call || !CS.isCallee(FnUseI)) - return false; - CallSite::arg_iterator AI = CS.arg_begin(); - Value *FirstArg = *AI; - - if (!isa<AllocaInst>(FirstArg)) - return false; - - // Check FirstArg's users. - for (Value::use_iterator ArgI = FirstArg->use_begin(), - ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) { - User *U = *ArgI; - // If FirstArg user is a CallInst that does not correspond to current - // call site then this function F is not suitable for sret promotion. - if (CallInst *CI = dyn_cast<CallInst>(U)) { - if (CI != Call) - return false; - } - // If FirstArg user is a GEP whose all users are not LoadInst then - // this function F is not suitable for sret promotion. - else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { - // TODO : Use dom info and insert PHINodes to collect get results - // from multiple call sites for this GEP. - if (GEP->getParent() != Call->getParent()) - return false; - for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end(); - GEPI != GEPE; ++GEPI) - if (!isa<LoadInst>(*GEPI)) - return false; - } - // Any other FirstArg users make this function unsuitable for sret - // promotion. - else - return false; - } - } - - return true; -} - -/// cloneFunctionBody - Create a new function based on F and -/// insert it into module. Remove first argument. Use STy as -/// the return type for new function. -Function *SRETPromotion::cloneFunctionBody(Function *F, - const StructType *STy) { - - const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; - - // Attributes - Keep track of the parameter attributes for the arguments. - SmallVector<AttributeWithIndex, 8> AttributesVec; - const AttrListPtr &PAL = F->getAttributes(); - - // Add any return attributes. - if (Attributes attrs = PAL.getRetAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); - - // Skip first argument. - Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - ++I; - // 0th parameter attribute is reserved for return type. - // 1th parameter attribute is for first 1st sret argument. - unsigned ParamIndex = 2; - while (I != E) { - Params.push_back(I->getType()); - if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) - AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); - ++I; - ++ParamIndex; - } - - // Add any fn attributes. - if (Attributes attrs = PAL.getFnAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); - - - FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg()); - Function *NF = Function::Create(NFTy, F->getLinkage()); - NF->takeName(F); - NF->copyAttributesFrom(F); - NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); - F->getParent()->getFunctionList().insert(F, NF); - NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - - // Replace arguments - I = F->arg_begin(); - E = F->arg_end(); - Function::arg_iterator NI = NF->arg_begin(); - ++I; - while (I != E) { - I->replaceAllUsesWith(NI); - NI->takeName(I); - ++I; - ++NI; - } - - return NF; -} - -/// updateCallSites - Update all sites that call F to use NF. -CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { - CallGraph &CG = getAnalysis<CallGraph>(); - SmallVector<Value*, 16> Args; - - // Attributes - Keep track of the parameter attributes for the arguments. - SmallVector<AttributeWithIndex, 8> ArgAttrsVec; - - // Get a new callgraph node for NF. - CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); - - while (!F->use_empty()) { - CallSite CS(*F->use_begin()); - Instruction *Call = CS.getInstruction(); - - const AttrListPtr &PAL = F->getAttributes(); - // Add any return attributes. - if (Attributes attrs = PAL.getRetAttributes()) - ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs)); - - // Copy arguments, however skip first one. - CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - Value *FirstCArg = *AI; - ++AI; - // 0th parameter attribute is reserved for return type. - // 1th parameter attribute is for first 1st sret argument. - unsigned ParamIndex = 2; - while (AI != AE) { - Args.push_back(*AI); - if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) - ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); - ++ParamIndex; - ++AI; - } - - // Add any function attributes. - if (Attributes attrs = PAL.getFnAttributes()) - ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs)); - - AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end()); - - // Build new call instruction. - Instruction *New; - if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { - New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); - cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); - cast<InvokeInst>(New)->setAttributes(NewPAL); - } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); - cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); - cast<CallInst>(New)->setAttributes(NewPAL); - if (cast<CallInst>(Call)->isTailCall()) - cast<CallInst>(New)->setTailCall(); - } - Args.clear(); - ArgAttrsVec.clear(); - New->takeName(Call); - - // Update the callgraph to know that the callsite has been transformed. - CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; - CalleeNode->removeCallEdgeFor(Call); - CalleeNode->addCalledFunction(New, NF_CGN); - - // Update all users of sret parameter to extract value using extractvalue. - for (Value::use_iterator UI = FirstCArg->use_begin(), - UE = FirstCArg->use_end(); UI != UE; ) { - User *U2 = *UI++; - CallInst *C2 = dyn_cast<CallInst>(U2); - if (C2 && (C2 == Call)) - continue; - - GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2); - ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2)); - Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), - "evi", UGEP); - while(!UGEP->use_empty()) { - // isSafeToUpdateAllCallers has checked that all GEP uses are - // LoadInsts - LoadInst *L = cast<LoadInst>(*UGEP->use_begin()); - L->replaceAllUsesWith(GR); - L->eraseFromParent(); - } - UGEP->eraseFromParent(); - continue; - } - Call->eraseFromParent(); - } - - return NF_CGN; -} - diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 9c2969c..9c70cf8 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,6 +11,7 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/IRBuilder.h" @@ -69,7 +70,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; - bool MustPreserveLCSSA; bool MadeIRChange; public: /// Worklist - All of the instructions that need to be simplified. @@ -217,8 +217,8 @@ private: Instruction *transformCallThroughTrampoline(CallSite CS); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); + Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - DbgDeclareInst *hasOneUsePlusDeclare(Value *V); Value *EmitGEPOffset(User *GEP); public: @@ -247,7 +247,10 @@ public: // segment of unreachable code, so just clobber the instruction. if (&I == V) V = UndefValue::get(I.getType()); - + + DEBUG(errs() << "IC: Replacing " << I << "\n" + " with " << *V << '\n'); + I.replaceAllUsesWith(V); return &I; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 7986d1a..a08446e 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -330,7 +331,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient /// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. @@ -755,6 +756,54 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } + + // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); + } + + // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + } + + // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 + // where CMAX is the all ones value for the truncated type, + // iff the lower bits of C2 and CA are zero. + if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) && + LHS->hasOneUse() && RHS->hasOneUse()) { + Value *V; + ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + + // (trunc x) == C1 & (and x, CA) == C2 + if (match(Val2, m_Trunc(m_Value(V))) && + match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = RHSCst; + BigCst = LHSCst; + } + // (and x, CA) == C2 & (trunc x) == C1 + else if (match(Val, m_Trunc(m_Value(V))) && + match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = LHSCst; + BigCst = RHSCst; + } + + if (SmallCst && BigCst) { + unsigned BigBitSize = BigCst->getType()->getBitWidth(); + unsigned SmallBitSize = SmallCst->getType()->getBitWidth(); + + // Check that the low bits are zero. + APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); + if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) { + Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue()); + APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue(); + Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N); + return Builder->CreateICmp(LHSCC, NewAnd, NewVal); + } + } } // From here on, we only handle: @@ -767,7 +816,17 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) return 0; - + + // Make a constant range that's the intersection of the two icmp ranges. + // If the intersection is empty, we know that the result is false. + ConstantRange LHSRange = + ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange RHSRange = + ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + + if (LHSRange.intersectWith(RHSRange).isEmptySet()) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) return 0; @@ -800,10 +859,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_EQ: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false - case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 @@ -851,9 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_SLT: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false - case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 @@ -1438,6 +1490,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } + + // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + + // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); + } } // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) @@ -1975,7 +2039,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } } - + + // or(sext(A), B) -> A ? -1 : B where A is an i1 + // or(A, sext(B)) -> B ? -1 : A where B is an i1 + if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); + if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); + // Note: If we've gotten to the point of visiting the outer OR, then the // inner one couldn't be simplified. If it was a constant, then it won't // be simplified by a later pass either, so we try swapping the inner/outer diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0e46450..726105f 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -475,7 +475,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } break; - case Intrinsic::umul_with_overflow: + case Intrinsic::umul_with_overflow: { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // Get the largest possible values for each operand. + APInt LHSMax = ~LHSKnownZero; + APInt RHSMax = ~RHSKnownZero; + + // If multiplying the maximum values does not overflow then we can turn + // this into a plain NUW mul. + bool Overflow; + LHSMax.umul_ov(RHSMax, Overflow); + if (!Overflow) { + Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); + Constant *V[] = { + UndefValue::get(LHS->getType()), + Builder->getFalse() + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, Mul, 0); + } + } // FALL THROUGH case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. if (isa<Constant>(II->getArgOperand(0)) && @@ -508,11 +537,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: - case Intrinsic::x86_sse_loadu_ps: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse2_loadu_dq: - // Turn PPC lvx -> load if the pointer is known aligned. - // Turn X86 loadups -> load if the pointer is known aligned. + // Turn PPC lvx -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -731,9 +756,13 @@ protected: dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { if (SizeCI->isAllOnesValue()) return true; - if (isString) - return SizeCI->getZExtValue() >= - GetStringLength(CI->getArgOperand(SizeArgOp)); + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } if (ConstantInt *Arg = dyn_cast<ConstantInt>( CI->getArgOperand(SizeArgOp))) return SizeCI->getZExtValue() >= Arg->getZExtValue(); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index b432641..6f70de8 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -87,10 +87,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. (A reference - // from a dbg.declare doesn't count as a use for this purpose.) - if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && - CastElTyAlign == AllocElTyAlign) return 0; + // same, we open the door to infinite loops of various kinds. + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); @@ -128,15 +126,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, New->setAlignment(AI.getAlignment()); New->takeName(&AI); - // If the allocation has one real use plus a dbg.declare, just remove the - // declare. - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { - EraseInstFromFunction(*(Instruction*)DI); - } // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it // will die soon. - else if (!AI.hasOneUse()) { + if (!AI.hasOneUse()) { // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); @@ -203,7 +196,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, } case Instruction::PHI: { PHINode *OPN = cast<PHINode>(I); - PHINode *NPN = PHINode::Create(Ty); + PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); @@ -883,6 +876,102 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return 0; } +/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { + Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); + ICmpInst::Predicate Pred = ICI->getPredicate(); + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative + // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive + if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) || + (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) { + + Value *Sh = ConstantInt::get(Op0->getType(), + Op0->getType()->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); + + if (Pred == ICmpInst::ICMP_SGT) + In = Builder->CreateNot(In, In->getName()+".not"); + return ReplaceInstUsesWith(CI, In); + } + + // If we know that only one bit of the LHS of the icmp can be set and we + // have an equality comparison with zero or a power of 2, we can transform + // the icmp and sext into bitwise/integer operations. + if (ICI->hasOneUse() && + ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ + unsigned BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { + Value *In = ICI->getOperand(0); + + // If the icmp tests for a known zero bit we can constant fold it. + if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) { + Value *V = Pred == ICmpInst::ICMP_NE ? + ConstantInt::getAllOnesValue(CI.getType()) : + ConstantInt::getNullValue(CI.getType()); + return ReplaceInstUsesWith(CI, V); + } + + if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) { + // sext ((x & 2^n) == 0) -> (x >> n) - 1 + // sext ((x & 2^n) != 2^n) -> (x >> n) - 1 + unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); + // Perform a right shift to place the desired bit in the LSB. + if (ShiftAmt) + In = Builder->CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // At this point "In" is either 1 or 0. Subtract 1 to turn + // {1, 0} -> {0, -1}. + In = Builder->CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); + } else { + // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 + // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 + unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); + // Perform a left shift to place the desired bit in the MSB. + if (ShiftAmt) + In = Builder->CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // Distribute the bit over the whole bit width. + In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), + BitWidth - 1), "sext"); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/); + } + } + } + + // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. + if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { + if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && + Op0->getType() == CI.getType()) { + const Type *EltTy = VTy->getElementType(); + + // splat the shift constant to a constant vector. + Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit"); + return ReplaceInstUsesWith(CI, In); + } + } + + return 0; +} + /// CanEvaluateSExtd - Return true if we can take the specified value /// and return it as type Ty without inserting any new casts and without /// changing the value of the common low bits. This is used by code that tries @@ -1006,44 +1095,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); return BinaryOperator::CreateAShr(Res, ShAmt); } - - - // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - { - ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS; - if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) { - // sext (x <s 0) to i32 --> x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) || - (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) { - Value *Sh = ConstantInt::get(CmpLHS->getType(), - CmpLHS->getType()->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit"); - if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); - - if (Pred == ICmpInst::ICMP_SGT) - In = Builder->CreateNot(In, In->getName()+".not"); - return ReplaceInstUsesWith(CI, In); - } - } - } - // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. - if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) { - ICmpInst::Predicate Pred; Value *CmpLHS; - if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) { - if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) { - const Type *EltTy = VTy->getElementType(); - - // splat the shift constant to a constant vector. - Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit"); - return ReplaceInstUsesWith(CI, In); - } - } - } + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) + return transformSExtICmp(ICI, CI); // If the input is a shl/ashr pair of a same constant, then this is a sign // extension from a smaller value. If we could trust arbitrary bitwidth diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 999de34..bb9b88b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -699,7 +699,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" + // so the values can never be equal. Similarly for all other "or equals" // operators. // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 @@ -1289,13 +1289,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: - // Only handle equality comparisons of shift-by-constant. - if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) - if (Instruction *Res = FoldICmpShrCst(ICI, cast<BinaryOperator>(LHSI), - ShAmt)) + case Instruction::AShr: { + // Handle equality comparisons of shift-by-constant. + BinaryOperator *BO = cast<BinaryOperator>(LHSI); + if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { + if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) return Res; + } + + // Handle exact shr's. + if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { + if (RHSV.isMinValue()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); + } break; + } case Instruction::SDiv: case Instruction::UDiv: @@ -1376,9 +1384,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) + if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { + if (BO->hasOneUse()) { Value *Neg = Builder->CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(ICI.getPredicate(), BOp0, Neg); @@ -1855,11 +1863,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_SLT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()+1)); case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_UGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_SGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); } @@ -1907,18 +1915,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // that code below can assume that Min != Max. if (!isa<Constant>(Op0) && Op0Min == Op0Max) return new ICmpInst(I.getPredicate(), - ConstantInt::get(I.getContext(), Op0Min), Op1); + ConstantInt::get(Op0->getType(), Op0Min), Op1); if (!isa<Constant>(Op1) && Op1Min == Op1Max) return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(I.getContext(), Op1Min)); + ConstantInt::get(Op1->getType(), Op1Min)); // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. + // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { default: llvm_unreachable("Unknown icmp opcode!"); case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check @@ -1955,7 +1963,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check @@ -1992,9 +2000,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { @@ -2010,9 +2018,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_UGT: if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -2029,9 +2037,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_SLT: if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { @@ -2042,9 +2050,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_SGT: if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -2057,30 +2065,30 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_SGE: assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_SLE: assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_UGE: assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_ULE: assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; } @@ -2306,6 +2314,35 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO0->hasOneUse() && BO1->hasOneUse()) return new ICmpInst(Pred, D, B); + BinaryOperator *SRem = NULL; + // icmp (srem X, Y), Y + if (BO0 && BO0->getOpcode() == Instruction::SRem && + Op1 == BO0->getOperand(1)) + SRem = BO0; + // icmp Y, (srem X, Y) + else if (BO1 && BO1->getOpcode() == Instruction::SRem && + Op0 == BO1->getOperand(1)) + SRem = BO1; + if (SRem) { + // We don't check hasOneUse to avoid increasing register pressure because + // the value we use is the same value this instruction was already using. + switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { + default: break; + case ICmpInst::ICMP_EQ: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + case ICmpInst::ICMP_NE: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), + Constant::getAllOnesValue(SRem->getType())); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), + Constant::getNullValue(SRem->getType())); + } + } + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() && BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) { @@ -2356,6 +2393,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } break; + case Instruction::UDiv: + case Instruction::LShr: + if (I.isSigned()) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!BO0->isExact() && !BO1->isExact()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + case Instruction::Shl: { + bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); + bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && I.isSigned()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + } } } } @@ -2425,9 +2483,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { Value *X = 0, *Y = 0, *Z = 0; if (A == C) { @@ -2448,6 +2505,32 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return &I; } } + + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to + // "icmp (and X, mask), cst" + uint64_t ShAmt = 0; + ConstantInt *Cst1; + if (Op0->hasOneUse() && + match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), + m_ConstantInt(ShAmt))))) && + match(Op1, m_ConstantInt(Cst1)) && + // Only do this when A has multiple uses. This is most important to do + // when it exposes other optimizations. + !A->hasOneUse()) { + unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); + + if (ShAmt < ASize) { + APInt MaskV = + APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); + MaskV <<= ShAmt; + + APInt CmpV = Cst1->getValue().zext(ASize); + CmpV <<= ShAmt; + + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); + return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); + } + } } { @@ -2704,6 +2787,42 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (Constant *RHSC = dyn_cast<Constant>(Op1)) { if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) switch (LHSI->getOpcode()) { + case Instruction::FPExt: { + // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless + FPExtInst *LHSExt = cast<FPExtInst>(LHSI); + ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); + if (!RHSF) + break; + + // We can't convert a PPC double double. + if (RHSF->getType()->isPPC_FP128Ty()) + break; + + const fltSemantics *Sem; + // FIXME: This shouldn't be here. + if (LHSExt->getSrcTy()->isFloatTy()) + Sem = &APFloat::IEEEsingle; + else if (LHSExt->getSrcTy()->isDoubleTy()) + Sem = &APFloat::IEEEdouble; + else if (LHSExt->getSrcTy()->isFP128Ty()) + Sem = &APFloat::IEEEquad; + else if (LHSExt->getSrcTy()->isX86_FP80Ty()) + Sem = &APFloat::x87DoubleExtended; + else + break; + + bool Lossy; + APFloat F = RHSF->getValueAPF(); + F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); + + // Avoid lossy conversions and denormals. + if (!Lossy && + F.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + ConstantFP::get(RHSC->getContext(), F)); + break; + } case Instruction::PHI: // Only fold fcmp into the PHI if the phi and fcmp are in the same // block. If in the same block, we're encouraging jump threading. If @@ -2742,6 +2861,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } + case Instruction::FSub: { + // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C + Value *Op; + if (match(LHSI, m_FNeg(m_Value(Op)))) + return new FCmpInst(I.getSwappedPredicate(), Op, + ConstantExpr::getFNeg(RHSC)); + break; + } case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { @@ -2755,5 +2882,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } } + // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y + Value *X, *Y; + if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) + return new FCmpInst(I.getSwappedPredicate(), X, Y); + + // fcmp (fpext x), (fpext y) -> fcmp x, y + if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) + if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) + if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + RHSExt->getOperand(0)); + return Changed ? &I : 0; } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 78ff734..432adc9 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -364,34 +364,12 @@ static bool equivalentAddressValues(Value *A, Value *B) { return false; } -// If this instruction has two uses, one of which is a llvm.dbg.declare, -// return the llvm.dbg.declare. -DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { - if (!V->hasNUses(2)) - return 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U)) - return DI; - if (isa<BitCastInst>(U) && U->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin())) - return DI; - } - } - return 0; -} - Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. - // If the RHS is an alloca with a two uses, the other one being a - // llvm.dbg.declare, zapify the store and the declare, making the - // alloca dead. We must do this to prevent declares from affecting - // codegen. if (!SI.isVolatile()) { if (Ptr->hasOneUse()) { if (isa<AllocaInst>(Ptr)) @@ -400,17 +378,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa<AllocaInst>(GEP->getOperand(0))) { if (GEP->getOperand(0)->hasOneUse()) return EraseInstFromFunction(SI); - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { - EraseInstFromFunction(*DI); - return EraseInstFromFunction(SI); - } } } } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { - EraseInstFromFunction(*DI); - return EraseInstFromFunction(SI); - } } // Attempt to improve the alignment. @@ -621,8 +591,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); - PN->reserveOperandSpace(2); + PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index d1a1fd6..57fb08a 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -320,6 +320,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } + // See if we can fold away this div instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; + // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y Value *X = 0, *Z = 0; if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 @@ -332,6 +336,19 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return 0; } +/// dyn_castZExtVal - Checks if V is a zext or constant that can +/// be truncated to Ty without losing bits. +static Value *dyn_castZExtVal(Value *V, const Type *Ty) { + if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) { + if (Z->getSrcTy() == Ty) + return Z->getOperand(0); + } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) { + if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth()) + return ConstantExpr::getTrunc(C, Ty); + } + return 0; +} + Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -390,6 +407,14 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { return SelectInst::Create(Cond, TSI, FSI); } } + + // (zext A) udiv (zext B) --> zext (A udiv B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", + I.isExact()), + I.getType()); + return 0; } @@ -452,27 +477,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - return 0; -} + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + const APFloat &Op1F = Op1C->getValueAPF(); -/// This function implements the transforms on rem instructions that work -/// regardless of the kind of rem instruction it is (urem, srem, or frem). It -/// is used by the visitors to those instructions. -/// @brief Transforms common to all three rem instructions -Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa<UndefValue>(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVectorTy()) - return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // If the divisor has an exact multiplicative inverse we can turn the fdiv + // into a cheaper fmul. + APFloat Reciprocal(Op1F.getSemantics()); + if (Op1F.getExactInverse(&Reciprocal)) { + ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Op0, RFP); + } } - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X % undef -> undef - - // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) - return &I; return 0; } @@ -484,26 +499,11 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Instruction *common = commonRemTransforms(I)) - return common; - - // X % X == 0 - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // 0 % X == 0 for integer, we don't need to preserve faults! - if (Constant *LHS = dyn_cast<Constant>(Op0)) - if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X % 0 == undef, we don't need to preserve faults! - if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - - if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -525,6 +525,9 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyURemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *common = commonIRemTransforms(I)) return common; @@ -552,13 +555,22 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { return SelectInst::Create(Cond, TrueAnd, FalseAnd); } } - + + // (zext A) urem (zext B) --> zext (A urem B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + I.getType()); + return 0; } Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifySRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer rem common cases if (Instruction *Common = commonIRemTransforms(I)) return Common; @@ -617,6 +629,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { } Instruction *InstCombiner::visitFRem(BinaryOperator &I) { - return commonRemTransforms(I); -} + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 297a18c..abf61bb 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -80,18 +80,16 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Value *InRHS = FirstInst->getOperand(1); PHINode *NewLHS = 0, *NewRHS = 0; if (LHSVal == 0) { - NewLHS = PHINode::Create(LHSType, + NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), FirstInst->getOperand(0)->getName() + ".pn"); - NewLHS->reserveOperandSpace(PN.getNumOperands()/2); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewLHS, PN); LHSVal = NewLHS; } if (RHSVal == 0) { - NewRHS = PHINode::Create(RHSType, + NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); - NewRHS->reserveOperandSpace(PN.getNumOperands()/2); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewRHS, PN); RHSVal = NewRHS; @@ -202,11 +200,10 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { if (FixedOperands[i]) continue; // operand doesn't need a phi. Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType(), + PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, FirstOp->getName()+".pn"); InsertNewInstBefore(NewPN, PN); - NewPN->reserveOperandSpace(e); NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); OperandPhis[i] = NewPN; FixedOperands[i] = NewPN; @@ -240,7 +237,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { /// obvious the value of the load is not changed from the point of the load to /// the end of the block it is in. /// -/// Finally, it is safe, but not profitable, to sink a load targetting a +/// Finally, it is safe, but not profitable, to sink a load targeting a /// non-address-taken alloca. Doing so will cause us to not promote the alloca /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { @@ -340,8 +337,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), + PN.getNumIncomingValues(), PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -446,8 +443,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getNumIncomingValues(), PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); Value *InVal = FirstInst->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -699,7 +696,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { // Otherwise, Create the new PHI node for this user. - EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); + EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), + PN->getName()+".off"+Twine(Offset), PN); assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); @@ -776,9 +774,6 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - // If LCSSA is around, don't mess with Phi nodes - if (MustPreserveLCSSA) return 0; - if (Value *V = SimplifyInstruction(&PN, TD)) return ReplaceInstUsesWith(PN, V); @@ -826,18 +821,18 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // quick check to see if the PHI node only contains a single non-phi value, if // so, scan to see if the phi cycle is actually equal to that value. { - unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues(); // Scan for the first non-phi operand. - while (InValNo != NumOperandVals && + while (InValNo != NumIncomingVals && isa<PHINode>(PN.getIncomingValue(InValNo))) ++InValNo; - if (InValNo != NumOperandVals) { - Value *NonPhiInVal = PN.getOperand(InValNo); + if (InValNo != NumIncomingVals) { + Value *NonPhiInVal = PN.getIncomingValue(InValNo); // Scan the rest of the operands to see if there are any conflicts, if so // there is no need to recursively scan other phis. - for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { Value *OpVal = PN.getIncomingValue(InValNo); if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) break; @@ -846,7 +841,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // If we scanned over all operands, then we have one unique value plus // phi values. Scan PHI nodes to see if they all merge in each other or // the value. - if (InValNo == NumOperandVals) { + if (InValNo == NumIncomingVals) { SmallPtrSet<PHINode*, 16> ValueEqualPHIs; if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) return ReplaceInstUsesWith(PN, NonPhiInVal); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 97abc76..61a433a 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -214,7 +214,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, unsigned OpToFold = 0; if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { OpToFold = 2; } @@ -227,9 +227,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); InsertNewInstBefore(NewSel, SI); NewSel->takeName(TVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) - return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI); + BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), + FalseVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(TVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -243,7 +250,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, unsigned OpToFold = 0; if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { OpToFold = 2; } @@ -256,9 +263,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); InsertNewInstBefore(NewSel, SI); NewSel->takeName(FVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) - return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI); + BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), + TrueVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(FVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -424,6 +438,19 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, return ReplaceInstUsesWith(SI, TrueVal); /// NOTE: if we wanted to, this is where to detect integer MIN/MAX } + + if (isa<Constant>(CmpRHS)) { + if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { + // Transform (X == C) ? X : Y -> (X == C) ? C : Y + SI.setOperand(1, CmpRHS); + Changed = true; + } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { + // Transform (X != C) ? Y : X -> (X != C) ? Y : C + SI.setOperand(2, CmpRHS); + Changed = true; + } + } + return Changed ? &SI : 0; } @@ -503,9 +530,8 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, if (!IC || !IC->isEquality()) return 0; - if (ConstantInt *C = dyn_cast<ConstantInt>(IC->getOperand(1))) - if (!C->isZero()) - return 0; + if (!match(IC->getOperand(1), m_Zero())) + return 0; ConstantInt *AndRHS; Value *LHS = IC->getOperand(0); diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index a7f8005..811f949 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -644,7 +644,14 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { return &I; } } - + + // (C1 << A) << C2 -> (C1 << C2) << A + Constant *C1, *C2; + Value *A; + if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) && + match(I.getOperand(1), m_Constant(C2))) + return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index bda8cea..6e727ce 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -684,6 +684,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (Rem->isAllOnesValue()) + break; APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits @@ -712,6 +716,18 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); } } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { + APInt Mask2 = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero |= LHSKnownZero; + } break; case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 5caa12d..ad6a8d0 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -230,8 +230,16 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { ConstantInt::get(Int32Ty, SrcIdx, false)); } + } else if (CastInst *CI = dyn_cast<CastInst>(I)) { + // Canonicalize extractelement(cast) -> cast(extractelement) + // bitcasts can change the number of vector elements and they cost nothing + if (CI->hasOneUse() && EI.hasOneUse() && + (CI->getOpcode() != Instruction::BitCast)) { + Value *EE = Builder->CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + } } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) } return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 9100a85..32009c3 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -53,6 +53,7 @@ public: void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { assert(Worklist.empty() && "Worklist must be empty to add initial group"); Worklist.reserve(NumEntries+16); + WorklistMap.resize(NumEntries); DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); for (; NumEntries; --NumEntries) { Instruction *I = List[NumEntries-1]; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 37123d0..7a84598 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -76,7 +76,6 @@ INITIALIZE_PASS(InstCombiner, "instcombine", "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } @@ -600,8 +599,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType(), ""); - NewPN->reserveOperandSpace(PN->getNumOperands()/2); + PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues(), ""); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); @@ -850,22 +848,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), Indices.end(), GEP.getName()); } - + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - if (StrippedPtr != PtrOp) { - const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + if (StrippedPtr != PtrOp && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { bool HasZeroPointerIndex = false; if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); - + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... // into : GEP i8* X, ... - // + // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); @@ -976,7 +975,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } } - + /// See if we can simplify: /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> @@ -984,12 +983,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { if (TD && - !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { + // Determine how much the GEP moves the pointer. We are guaranteed to get // a constant back from EmitGEPOffset. ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); int64_t Offset = OffsetV->getSExtValue(); - + // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. if (Offset == 0) { @@ -1635,7 +1636,6 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { bool InstCombiner::runOnFunction(Function &F) { - MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); TD = getAnalysisIfAvailable<TargetData>(); @@ -1648,6 +1648,10 @@ bool InstCombiner::runOnFunction(Function &F) { bool EverMadeChange = false; + // Lower dbg.declare intrinsics otherwise their value may be clobbered + // by instcombiner. + EverMadeChange = LowerDbgDeclare(F); + // Iterate while there is work to do. unsigned Iteration = 0; while (DoOneIteration(F, Iteration++)) diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 0ac1cb0..5700ac8 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMInstrumentation EdgeProfiling.cpp + GCOVProfiling.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp PathProfiling.cpp diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp new file mode 100644 index 0000000..2425342 --- /dev/null +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -0,0 +1,638 @@ +//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements GCOV-style profiling. When this pass is run it emits +// "gcno" files next to the existing source, and instruments the code that runs +// to records the edges between blocks that run and emit a complementary "gcda" +// file on exit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "insert-gcov-profiling" + +#include "ProfilingUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/PathV2.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/UniqueVector.h" +#include <string> +#include <utility> +using namespace llvm; + +namespace { + class GCOVProfiler : public ModulePass { + bool runOnModule(Module &M); + public: + static char ID; + GCOVProfiler() + : ModulePass(ID), EmitNotes(true), EmitData(true) { + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } + GCOVProfiler(bool EmitNotes, bool EmitData) + : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData) { + assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "GCOV Profiler"; + } + + private: + // Create the GCNO files for the Module based on DebugInfo. + void emitGCNO(DebugInfoFinder &DIF); + + // Modify the program to track transitions along edges and call into the + // profiling runtime to emit .gcda files when run. + bool emitProfileArcs(DebugInfoFinder &DIF); + + // Get pointers to the functions in the runtime library. + Constant *getStartFileFunc(); + Constant *getIncrementIndirectCounterFunc(); + Constant *getEmitFunctionFunc(); + Constant *getEmitArcsFunc(); + Constant *getEndFileFunc(); + + // Create or retrieve an i32 state value that is used to represent the + // pred block number for certain non-trivial edges. + GlobalVariable *getEdgeStateValue(); + + // Produce a table of pointers to counters, by predecessor and successor + // block number. + GlobalVariable *buildEdgeLookupTable(Function *F, + GlobalVariable *Counter, + const UniqueVector<BasicBlock *> &Preds, + const UniqueVector<BasicBlock *> &Succs); + + // Add the function to write out all our counters to the global destructor + // list. + void insertCounterWriteout(DebugInfoFinder &, + SmallVector<std::pair<GlobalVariable *, + uint32_t>, 8> &); + + bool EmitNotes; + bool EmitData; + + Module *M; + LLVMContext *Ctx; + }; +} + +char GCOVProfiler::ID = 0; +INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", + "Insert instrumentation for GCOV profiling", false, false) + +ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData) { + return new GCOVProfiler(EmitNotes, EmitData); +} + +static DISubprogram findSubprogram(DIScope Scope) { + while (!Scope.isSubprogram()) { + assert(Scope.isLexicalBlock() && + "Debug location not lexical block or subprogram"); + Scope = DILexicalBlock(Scope).getContext(); + } + return DISubprogram(Scope); +} + +namespace { + class GCOVRecord { + protected: + static const char *LinesTag; + static const char *FunctionTag; + static const char *BlockTag; + static const char *EdgeTag; + + GCOVRecord() {} + + void writeBytes(const char *Bytes, int Size) { + os->write(Bytes, Size); + } + + void write(uint32_t i) { + writeBytes(reinterpret_cast<char*>(&i), 4); + } + + // Returns the length measured in 4-byte blocks that will be used to + // represent this string in a GCOV file + unsigned lengthOfGCOVString(StringRef s) { + // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs + // padding out to the next 4-byte word. The length is measured in 4-byte + // words including padding, not bytes of actual string. + return (s.size() + 5) / 4; + } + + void writeGCOVString(StringRef s) { + uint32_t Len = lengthOfGCOVString(s); + write(Len); + writeBytes(s.data(), s.size()); + + // Write 1 to 4 bytes of NUL padding. + assert((unsigned)(4 - (s.size() % 4)) > 0); + assert((unsigned)(4 - (s.size() % 4)) <= 4); + writeBytes("\0\0\0\0", 4 - (s.size() % 4)); + } + + raw_ostream *os; + }; + const char *GCOVRecord::LinesTag = "\0\0\x45\x01"; + const char *GCOVRecord::FunctionTag = "\0\0\0\1"; + const char *GCOVRecord::BlockTag = "\0\0\x41\x01"; + const char *GCOVRecord::EdgeTag = "\0\0\x43\x01"; + + class GCOVFunction; + class GCOVBlock; + + // Constructed only by requesting it from a GCOVBlock, this object stores a + // list of line numbers and a single filename, representing lines that belong + // to the block. + class GCOVLines : public GCOVRecord { + public: + void addLine(uint32_t Line) { + Lines.push_back(Line); + } + + uint32_t length() { + return lengthOfGCOVString(Filename) + 2 + Lines.size(); + } + + private: + friend class GCOVBlock; + + GCOVLines(std::string Filename, raw_ostream *os) + : Filename(Filename) { + this->os = os; + } + + std::string Filename; + SmallVector<uint32_t, 32> Lines; + }; + + // Represent a basic block in GCOV. Each block has a unique number in the + // function, number of lines belonging to each block, and a set of edges to + // other blocks. + class GCOVBlock : public GCOVRecord { + public: + GCOVLines &getFile(std::string Filename) { + GCOVLines *&Lines = LinesByFile[Filename]; + if (!Lines) { + Lines = new GCOVLines(Filename, os); + } + return *Lines; + } + + void addEdge(GCOVBlock &Successor) { + OutEdges.push_back(&Successor); + } + + void writeOut() { + uint32_t Len = 3; + for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), + E = LinesByFile.end(); I != E; ++I) { + Len += I->second->length(); + } + + writeBytes(LinesTag, 4); + write(Len); + write(Number); + for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), + E = LinesByFile.end(); I != E; ++I) { + write(0); + writeGCOVString(I->second->Filename); + for (int i = 0, e = I->second->Lines.size(); i != e; ++i) { + write(I->second->Lines[i]); + } + } + write(0); + write(0); + } + + ~GCOVBlock() { + DeleteContainerSeconds(LinesByFile); + } + + private: + friend class GCOVFunction; + + GCOVBlock(uint32_t Number, raw_ostream *os) + : Number(Number) { + this->os = os; + } + + uint32_t Number; + StringMap<GCOVLines *> LinesByFile; + SmallVector<GCOVBlock *, 4> OutEdges; + }; + + // A function has a unique identifier, a checksum (we leave as zero) and a + // set of blocks and a map of edges between blocks. This is the only GCOV + // object users can construct, the blocks and lines will be rooted here. + class GCOVFunction : public GCOVRecord { + public: + GCOVFunction(DISubprogram SP, raw_ostream *os) { + this->os = os; + + Function *F = SP.getFunction(); + uint32_t i = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + Blocks[BB] = new GCOVBlock(i++, os); + } + ReturnBlock = new GCOVBlock(i++, os); + + writeBytes(FunctionTag, 4); + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + + 1 + lengthOfGCOVString(SP.getFilename()) + 1; + write(BlockLen); + uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); + write(Ident); + write(0); // checksum + writeGCOVString(SP.getName()); + writeGCOVString(SP.getFilename()); + write(SP.getLineNumber()); + } + + ~GCOVFunction() { + DeleteContainerSeconds(Blocks); + delete ReturnBlock; + } + + GCOVBlock &getBlock(BasicBlock *BB) { + return *Blocks[BB]; + } + + GCOVBlock &getReturnBlock() { + return *ReturnBlock; + } + + void writeOut() { + // Emit count of blocks. + writeBytes(BlockTag, 4); + write(Blocks.size() + 1); + for (int i = 0, e = Blocks.size() + 1; i != e; ++i) { + write(0); // No flags on our blocks. + } + + // Emit edges between blocks. + for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + GCOVBlock &Block = *I->second; + if (Block.OutEdges.empty()) continue; + + writeBytes(EdgeTag, 4); + write(Block.OutEdges.size() * 2 + 1); + write(Block.Number); + for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { + write(Block.OutEdges[i]->Number); + write(0); // no flags + } + } + + // Emit lines for each block. + for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + I->second->writeOut(); + } + } + + private: + DenseMap<BasicBlock *, GCOVBlock *> Blocks; + GCOVBlock *ReturnBlock; + }; +} + +// Replace the stem of a file, or add one if missing. +static std::string replaceStem(std::string OrigFilename, std::string NewStem) { + return (sys::path::stem(OrigFilename) + "." + NewStem).str(); +} + +bool GCOVProfiler::runOnModule(Module &M) { + this->M = &M; + Ctx = &M.getContext(); + + DebugInfoFinder DIF; + DIF.processModule(M); + + if (EmitNotes) emitGCNO(DIF); + if (EmitData) return emitProfileArcs(DIF); + return false; +} + +void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { + DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles; + for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(), + E = DIF.compile_unit_end(); I != E; ++I) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(*I); + raw_fd_ostream *&out = GcnoFiles[CU]; + std::string ErrorInfo; + out = new raw_fd_ostream(replaceStem(CU.getFilename(), "gcno").c_str(), + ErrorInfo, raw_fd_ostream::F_Binary); + out->write("oncg*404MVLL", 12); + } + + for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), + SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { + DISubprogram SP(*SPI); + raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()]; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, os); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + } + } else if (isa<ReturnInst>(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); + } + } + Func.writeOut(); + } + + for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator + I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) { + raw_fd_ostream *&out = I->second; + out->write("\0\0\0\0\0\0\0\0", 8); // EOF + out->close(); + delete out; + } +} + +bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) { + if (DIF.subprogram_begin() == DIF.subprogram_end()) + return false; + + SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> CountersByIdent; + for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), + SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { + DISubprogram SP(*SPI); + Function *F = SP.getFunction(); + if (!F) continue; + + unsigned Edges = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (isa<ReturnInst>(TI)) + ++Edges; + else + Edges += TI->getNumSuccessors(); + } + + const ArrayType *CounterTy = + ArrayType::get(Type::getInt64Ty(*Ctx), Edges); + GlobalVariable *Counters = + new GlobalVariable(*M, CounterTy, false, + GlobalValue::InternalLinkage, + Constant::getNullValue(CounterTy), + "__llvm_gcov_ctr", 0, false, 0); + CountersByIdent.push_back( + std::make_pair(Counters, reinterpret_cast<intptr_t>((MDNode*)SP))); + + UniqueVector<BasicBlock *> ComplexEdgePreds; + UniqueVector<BasicBlock *> ComplexEdgeSuccs; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); + if (Successors) { + IRBuilder<> Builder(TI); + + if (Successors == 1) { + Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + Value *Sel = Builder.CreateSelect( + BI->getCondition(), + ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), + ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); + SmallVector<Value *, 2> Idx; + Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); + Idx.push_back(Sel); + Value *Counter = Builder.CreateInBoundsGEP(Counters, + Idx.begin(), Idx.end()); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else { + ComplexEdgePreds.insert(BB); + for (int i = 0; i != Successors; ++i) + ComplexEdgeSuccs.insert(TI->getSuccessor(i)); + } + Edge += Successors; + } + } + + if (!ComplexEdgePreds.empty()) { + GlobalVariable *EdgeTable = + buildEdgeLookupTable(F, Counters, + ComplexEdgePreds, ComplexEdgeSuccs); + GlobalVariable *EdgeState = getEdgeStateValue(); + + const Type *Int32Ty = Type::getInt32Ty(*Ctx); + for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { + IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); + Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); + } + for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { + // call runtime to perform increment + IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI()); + Value *CounterPtrArray = + Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, + i * ComplexEdgePreds.size()); + Builder.CreateCall2(getIncrementIndirectCounterFunc(), + EdgeState, CounterPtrArray); + // clear the predecessor number + Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); + } + } + } + + insertCounterWriteout(DIF, CountersByIdent); + + return true; +} + +// All edges with successors that aren't branches are "complex", because it +// requires complex logic to pick which counter to update. +GlobalVariable *GCOVProfiler::buildEdgeLookupTable( + Function *F, + GlobalVariable *Counters, + const UniqueVector<BasicBlock *> &Preds, + const UniqueVector<BasicBlock *> &Succs) { + // TODO: support invoke, threads. We rely on the fact that nothing can modify + // the whole-Module pred edge# between the time we set it and the time we next + // read it. Threads and invoke make this untrue. + + // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. + const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + const ArrayType *EdgeTableTy = ArrayType::get( + Int64PtrTy, Succs.size() * Preds.size()); + + Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; + Constant *NullValue = Constant::getNullValue(Int64PtrTy); + for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i) + EdgeTable[i] = NullValue; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); + if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) { + for (int i = 0; i != Successors; ++i) { + BasicBlock *Succ = TI->getSuccessor(i); + IRBuilder<> builder(Succ); + Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge + i); + EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + + (Preds.idFor(BB)-1)] = cast<Constant>(Counter); + } + } + Edge += Successors; + } + + GlobalVariable *EdgeTableGV = + new GlobalVariable( + *M, EdgeTableTy, true, GlobalValue::InternalLinkage, + ConstantArray::get(EdgeTableTy, + &EdgeTable[0], Succs.size() * Preds.size()), + "__llvm_gcda_edge_table"); + EdgeTableGV->setUnnamedAddr(true); + return EdgeTableGV; +} + +Constant *GCOVProfiler::getStartFileFunc() { + const Type *Args[] = { Type::getInt8PtrTy(*Ctx) }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_start_file", FTy); +} + +Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { + const Type *Args[] = { + Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor + Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row + }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy); +} + +Constant *GCOVProfiler::getEmitFunctionFunc() { + const Type *Args[] = { Type::getInt32Ty(*Ctx) }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); +} + +Constant *GCOVProfiler::getEmitArcsFunc() { + const Type *Args[] = { + Type::getInt32Ty(*Ctx), // uint32_t num_counters + Type::getInt64PtrTy(*Ctx), // uint64_t *counters + }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); +} + +Constant *GCOVProfiler::getEndFileFunc() { + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_gcda_end_file", FTy); +} + +GlobalVariable *GCOVProfiler::getEdgeStateValue() { + GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred"); + if (!GV) { + GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false, + GlobalValue::InternalLinkage, + ConstantInt::get(Type::getInt32Ty(*Ctx), + 0xffffffff), + "__llvm_gcov_global_state_pred"); + GV->setUnnamedAddr(true); + } + return GV; +} + +void GCOVProfiler::insertCounterWriteout( + DebugInfoFinder &DIF, + SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> &CountersByIdent) { + const FunctionType *WriteoutFTy = + FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *WriteoutF = Function::Create(WriteoutFTy, + GlobalValue::InternalLinkage, + "__llvm_gcov_writeout", M); + WriteoutF->setUnnamedAddr(true); + BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF); + IRBuilder<> Builder(BB); + + Constant *StartFile = getStartFileFunc(); + Constant *EmitFunction = getEmitFunctionFunc(); + Constant *EmitArcs = getEmitArcsFunc(); + Constant *EndFile = getEndFileFunc(); + + for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(), + CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) { + DICompileUnit compile_unit(*CUI); + std::string FilenameGcda = replaceStem(compile_unit.getFilename(), "gcda"); + Builder.CreateCall(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda)); + for (SmallVector<std::pair<GlobalVariable *, uint32_t>, 8>::iterator + I = CountersByIdent.begin(), E = CountersByIdent.end(); + I != E; ++I) { + Builder.CreateCall(EmitFunction, ConstantInt::get(Type::getInt32Ty(*Ctx), + I->second)); + GlobalVariable *GV = I->first; + unsigned Arcs = + cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); + Builder.CreateCall2(EmitArcs, + ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), + Builder.CreateConstGEP2_64(GV, 0, 0)); + } + Builder.CreateCall(EndFile); + } + Builder.CreateRetVoid(); + + InsertProfilingShutdownCall(WriteoutF, M); +} diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 96ed4fa..71adc1e 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -23,6 +23,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeEdgeProfilerPass(Registry); initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); + initializeGCOVProfilerPass(Registry); } /// LLVMInitializeInstrumentation - C binding for diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 829da6b..f76c77e 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This module privides means for calculating a maximum spanning tree for a +// This module provides means for calculating a maximum spanning tree for a // given set of weighted edges. The type parameter T is the type of a node. // //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index c85a1a9..e09f882 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-optimal-edge-profiling" #include "ProfilingUtils.h" +#include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" @@ -26,7 +27,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "MaximumSpanningTree.h" -#include <set> using namespace llvm; STATISTIC(NumEdgesInserted, "The # of edges inserted."); @@ -120,14 +120,14 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { NumEdgesInserted = 0; std::vector<Constant*> Initializer(NumEdges); - Constant* Zero = ConstantInt::get(Int32, 0); - Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); + Constant *Zero = ConstantInt::get(Int32, 0); + Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); // Instrument all of the edges not in MST... unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual @@ -139,17 +139,17 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::EdgeWeights ECs = getAnalysis<ProfileInfo>(*F).getEdgeWeights(F); std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end()); - MaximumSpanningTree<BasicBlock> MST (EdgeVector); - std::stable_sort(MST.begin(),MST.end()); + MaximumSpanningTree<BasicBlock> MST(EdgeVector); + std::stable_sort(MST.begin(), MST.end()); // Check if (0,entry) not in the MST. If not, instrument edge // (IncrementCounterInBlock()) and set the counter initially to zero, if // the edge is in the MST the counter is initialised to -1. BasicBlock *entry = &(F->getEntryBlock()); - ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); + ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry); if (!std::binary_search(MST.begin(), MST.end(), edge)) { - printEdgeCounter(edge,entry,i); + printEdgeCounter(edge, entry, i); IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ @@ -170,9 +170,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // has no successors, the virtual edge (BB,0) is processed. TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 0) { - ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); + ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0); if (!std::binary_search(MST.begin(), MST.end(), edge)) { - printEdgeCounter(edge,BB,i); + printEdgeCounter(edge, BB, i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ @@ -195,11 +195,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // otherwise insert it in the successor block. if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block - printEdgeCounter(edge,BB,i); + printEdgeCounter(edge, BB, i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; } else { // Insert counter at the start of the block - printEdgeCounter(edge,Succ,i); + printEdgeCounter(edge, Succ, i); IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted; } Initializer[i++] = (Zero); @@ -212,9 +212,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // Check if the number of edges counted at first was the number of edges we // considered for instrumentation. - assert(i==NumEdges && "the number of edges in counting array is wrong"); + assert(i == NumEdges && "the number of edges in counting array is wrong"); - // Assing the now completely defined initialiser to the array. + // Assign the now completely defined initialiser to the array. Constant *init = ConstantArray::get(ATy, Initializer); Counters->setInitializer(init); diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 6449b39..6b3f12d 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -63,7 +63,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Instrumentation.h" -#include <map> #include <vector> #define HASH_THRESHHOLD 100000 @@ -259,7 +258,7 @@ private: }; // --------------------------------------------------------------------------- -// PathProfiler is a module pass which intruments path profiling instructions +// PathProfiler is a module pass which instruments path profiling instructions // --------------------------------------------------------------------------- class PathProfiler : public ModulePass { private: @@ -389,6 +388,9 @@ namespace llvm { // BallLarusEdge << operator overloading raw_ostream& operator<<(raw_ostream& os, + const BLInstrumentationEdge& edge) + LLVM_ATTRIBUTE_USED; + raw_ostream& operator<<(raw_ostream& os, const BLInstrumentationEdge& edge) { os << "[" << edge.getSource()->getName() << " -> " << edge.getTarget()->getName() << "] init: " @@ -929,14 +931,16 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* void PathProfiler::preparePHI(BLInstrumentationNode* node) { BasicBlock* block = node->getBlock(); BasicBlock::iterator insertPoint = block->getFirstNonPHI(); - PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber", + pred_iterator PB = pred_begin(node->getBlock()), + PE = pred_end(node->getBlock()); + PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), + std::distance(PB, PE), "pathNumber", insertPoint ); node->setPathPHI(phi); node->setStartingPathNumber(phi); node->setEndingPathNumber(phi); - for(pred_iterator predIt = pred_begin(node->getBlock()), - end = pred_end(node->getBlock()); predIt != end; predIt++) { + for(pred_iterator predIt = PB; predIt != PE; predIt++) { BasicBlock* pred = (*predIt); if(pred != NULL) diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index b57bbf6..7435bc3 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -110,7 +110,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray, bool beginning) { // Insert the increment after any alloca or PHI instructions... BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() : - BB->getTerminator(); + BB->getTerminator(); while (isa<AllocaInst>(InsertPos)) ++InsertPos; @@ -121,8 +121,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); Constant *ElementPtr = - ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], - Indices.size()); + ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); @@ -131,3 +130,41 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, "NewFuncCounter", InsertPos); new StoreInst(NewVal, ElementPtr, InsertPos); } + +void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { + // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those + // types. + const Type *GlobalDtorElems[2] = { + Type::getInt32Ty(Mod->getContext()), + FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() + }; + const StructType *GlobalDtorElemTy = + StructType::get(Mod->getContext(), GlobalDtorElems, false); + + // Construct the new element we'll be adding. + Constant *Elem[2] = { + ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535), + ConstantExpr::getBitCast(Callee, GlobalDtorElems[1]) + }; + + // If llvm.global_dtors exists, make a copy of the things in its list and + // delete it, to replace it with one that has a larger array type. + std::vector<Constant *> dtors; + if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) { + if (ConstantArray *InitList = + dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) { + for (unsigned i = 0, e = InitList->getType()->getNumElements(); + i != e; ++i) + dtors.push_back(cast<Constant>(InitList->getOperand(i))); + } + GlobalDtors->eraseFromParent(); + } + + // Build up llvm.global_dtors with our new item in it. + GlobalVariable *GlobalDtors = new GlobalVariable( + *Mod, ArrayType::get(GlobalDtorElemTy, 1), false, + GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors"); + dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false)); + GlobalDtors->setInitializer(ConstantArray::get( + cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors)); +} diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h index a76e357..09b2217 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.h +++ b/lib/Transforms/Instrumentation/ProfilingUtils.h @@ -18,9 +18,10 @@ #define PROFILINGUTILS_H namespace llvm { + class BasicBlock; class Function; class GlobalValue; - class BasicBlock; + class Module; class PointerType; void InsertProfilingInitCall(Function *MainFn, const char *FnName, @@ -29,6 +30,7 @@ namespace llvm { void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray, bool beginning = true); + void InsertProfilingShutdownCall(Function *Callee, Module *Mod); } #endif diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 106fb8f..fcf914f 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -7,7 +7,6 @@ add_llvm_library(LLVMScalarOpts DCE.cpp DeadStoreElimination.cpp EarlyCSE.cpp - GEPSplitter.cpp GVN.cpp IndVarSimplify.cpp JumpThreading.cpp @@ -27,7 +26,6 @@ add_llvm_library(LLVMScalarOpts Scalar.cpp ScalarReplAggregates.cpp SimplifyCFGPass.cpp - SimplifyHalfPowrLibCalls.cpp SimplifyLibCalls.cpp Sink.cpp TailDuplication.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 9536939..0184390 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -47,21 +47,21 @@ using namespace llvm; using namespace llvm::PatternMatch; STATISTIC(NumBlocksElim, "Number of blocks eliminated"); -STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); -STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"); STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); -STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); -STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); -static cl::opt<bool> -CriticalEdgeSplit("cgp-critical-edge-splitting", - cl::desc("Split critical edges during codegen prepare"), - cl::init(false), cl::Hidden); +static cl::opt<bool> DisableBranchOpts( + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); namespace { class CodeGenPrepare : public FunctionPass { @@ -76,15 +76,15 @@ namespace { /// update it. BasicBlock::iterator CurInstIterator; - /// BackEdges - Keep a set of all the loop back edges. - /// - SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges; - - // Keeps track of non-local addresses that have been sunk into a block. This - // allows us to avoid inserting duplicate code for blocks with multiple - // load/stores of the same address. + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. DenseMap<Value*, Value*> SunkAddrs; + /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to + /// be updated. + bool ModifiedDT; + public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) @@ -98,10 +98,6 @@ namespace { AU.addPreserved<ProfileInfo>(); } - virtual void releaseMemory() { - BackEdges.clear(); - } - private: bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -113,7 +109,7 @@ namespace { bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); - void findLoopBackEdges(const Function &F); + bool DupRetToEnableTailCallOpts(ReturnInst *RI); }; } @@ -125,40 +121,42 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); } -/// findLoopBackEdges - Do a DFS walk to find loop back edges. -/// -void CodeGenPrepare::findLoopBackEdges(const Function &F) { - SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; - FindFunctionBackedges(F, Edges); - - BackEdges.insert(Edges.begin(), Edges.end()); -} - - bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + ModifiedDT = false; DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); + // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); - // Now find loop back edges, but only if they are being used to decide which - // critical edges to split. - if (CriticalEdgeSplit) - findLoopBackEdges(F); - bool MadeChange = true; while (MadeChange) { MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; MadeChange |= OptimizeBlock(*BB); + } EverMadeChange |= MadeChange; } SunkAddrs.clear(); + if (!DisableBranchOpts) { + MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + MadeChange |= ConstantFoldTerminator(BB); + + if (MadeChange) + ModifiedDT = true; + EverMadeChange |= MadeChange; + } + + if (ModifiedDT && DT) + DT->DT->recalculate(F); + return EverMadeChange; } @@ -333,7 +331,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (DT) { + if (DT && !ModifiedDT) { BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); @@ -350,110 +348,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } -/// FindReusablePredBB - Check all of the predecessors of the block DestPHI -/// lives in to see if there is a block that we can reuse as a critical edge -/// from TIBB. -static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { - BasicBlock *Dest = DestPHI->getParent(); - - /// TIPHIValues - This array is lazily computed to determine the values of - /// PHIs in Dest that TI would provide. - SmallVector<Value*, 32> TIPHIValues; - - /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. - unsigned TIBBEntryNo = 0; - - // Check to see if Dest has any blocks that can be used as a split edge for - // this terminator. - for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { - BasicBlock *Pred = DestPHI->getIncomingBlock(pi); - // To be usable, the pred has to end with an uncond branch to the dest. - BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); - if (!PredBr || !PredBr->isUnconditional()) - continue; - // Must be empty other than the branch and debug info. - BasicBlock::iterator I = Pred->begin(); - while (isa<DbgInfoIntrinsic>(I)) - I++; - if (&*I != PredBr) - continue; - // Cannot be the entry block; its label does not get emitted. - if (Pred == &Dest->getParent()->getEntryBlock()) - continue; - - // Finally, since we know that Dest has phi nodes in it, we have to make - // sure that jumping to Pred will have the same effect as going to Dest in - // terms of PHI values. - PHINode *PN; - unsigned PHINo = 0; - unsigned PredEntryNo = pi; - - bool FoundMatch = true; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { - if (PHINo == TIPHIValues.size()) { - if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) - TIBBEntryNo = PN->getBasicBlockIndex(TIBB); - TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); - } - - // If the PHI entry doesn't work, we can't use this pred. - if (PN->getIncomingBlock(PredEntryNo) != Pred) - PredEntryNo = PN->getBasicBlockIndex(Pred); - - if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { - FoundMatch = false; - break; - } - } - - // If we found a workable predecessor, change TI to branch to Succ. - if (FoundMatch) - return Pred; - } - return 0; -} - - -/// SplitEdgeNicely - Split the critical edge from TI to its specified -/// successor if it will improve codegen. We only do this if the successor has -/// phi nodes (otherwise critical edges are ok). If there is already another -/// predecessor of the succ that is empty (and thus has no phi nodes), use it -/// instead of introducing a new block. -static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, - SmallSet<std::pair<const BasicBlock*, - const BasicBlock*>, 8> &BackEdges, - Pass *P) { - BasicBlock *TIBB = TI->getParent(); - BasicBlock *Dest = TI->getSuccessor(SuccNum); - assert(isa<PHINode>(Dest->begin()) && - "This should only be called if Dest has a PHI!"); - PHINode *DestPHI = cast<PHINode>(Dest->begin()); - - // Do not split edges to EH landing pads. - if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) - if (Invoke->getSuccessor(1) == Dest) - return; - - // As a hack, never split backedges of loops. Even though the copy for any - // PHIs inserted on the backedge would be dead for exits from the loop, we - // assume that the cost of *splitting* the backedge would be too high. - if (BackEdges.count(std::make_pair(TIBB, Dest))) - return; - - if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) { - ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PFI) - PFI->splitEdge(TIBB, Dest, ReuseBB); - Dest->removePredecessor(TIBB); - TI->setSuccessor(SuccNum, ReuseBB); - return; - } - - SplitCriticalEdge(TI, SuccNum, P, true); -} - - /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), /// sink it into user blocks to reduce the number of virtual @@ -640,7 +534,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // happens. WeakVH IterHandle(CurInstIterator); - ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT); + ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, + ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -666,6 +561,129 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { return Simplifier.fold(CI, TD); } +/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return +/// instructions to the predecessor to enable tail call optimizations. The +/// case it is currently looking for is: +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// +/// => +/// +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// +bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { + if (!TLI) + return false; + + Value *V = RI->getReturnValue(); + PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; + if (V && !PN) + return false; + + BasicBlock *BB = RI->getParent(); + if (PN && PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Make sure there are no instructions between the PHI and return, or that the + // return is the first instruction in the block. + if (PN) { + BasicBlock::iterator BI = BB->begin(); + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI != RI) + return false; + } else { + BasicBlock::iterator BI = BB->begin(); + while (isa<DbgInfoIntrinsic>(BI)) ++BI; + if (&*BI != RI) + return false; + } + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail + /// call. + SmallVector<CallInst*, 4> TailCalls; + if (PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + // Make sure the phi value is indeed produced by the tail call. + if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } else { + SmallPtrSet<BasicBlock*, 4> VisitedBBs; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (!VisitedBBs.insert(*PI)) + continue; + + BasicBlock::InstListType &InstList = (*PI)->getInstList(); + BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); + BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); + do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); + if (RI == RE) + continue; + + CallInst *CI = dyn_cast<CallInst>(&*RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of the + // return. Ignore noalias because it doesn't affect the call sequence. + unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + continue; + + // Make sure the call instruction is followed by an unconditional branch to + // the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + ModifiedDT = Changed = true; + ++NumRetsDup; + } + + // If we eliminated all predecessors of the block, delete the block now. + if (Changed && pred_begin(BB) == pred_end(BB)) + BB->eraseFromParent(); + + return Changed; +} + //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// @@ -701,7 +719,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. Value *Consensus = 0; - unsigned NumUses = 0; + unsigned NumUsesConsensus = 0; + bool IsNumUsesConsensusValid = false; SmallVector<Instruction*, 16> AddrModeInsts; ExtAddrMode AddrMode; while (!worklist.empty()) { @@ -728,16 +747,31 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(V, AccessTy,MemoryInst, NewAddrModeInsts, *TLI); - - // Ensure that the obtained addressing mode is equivalent to that obtained - // for all other roots of the PHI traversal. Also, when choosing one - // such root as representative, select the one with the most uses in order - // to keep the cost modeling heuristics in AddressingModeMatcher applicable. - if (!Consensus || NewAddrMode == AddrMode) { - if (V->getNumUses() > NumUses) { + + // This check is broken into two cases with very similar code to avoid using + // getNumUses() as much as possible. Some values have a lot of uses, so + // calling getNumUses() unconditionally caused a significant compile-time + // regression. + if (!Consensus) { + Consensus = V; + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + continue; + } else if (NewAddrMode == AddrMode) { + if (!IsNumUsesConsensusValid) { + NumUsesConsensus = Consensus->getNumUses(); + IsNumUsesConsensusValid = true; + } + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher + // applicable. + unsigned NumUses = V->getNumUses(); + if (NumUses > NumUsesConsensus) { Consensus = V; - NumUses = V->getNumUses(); - AddrMode = NewAddrMode; + NumUsesConsensus = NumUses; AddrModeInsts = NewAddrModeInsts; } continue; @@ -855,11 +889,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, MemoryInst->replaceUsesOfWith(Repl, SunkAddr); + // If we have no uses, recursively delete the value and all dead instructions + // using it. if (Repl->use_empty()) { + // This can cause recursive deletion, which can invalidate our iterator. + // Use a WeakVH to hold onto it in case this happens. + WeakVH IterHandle(CurInstIterator); + BasicBlock *BB = CurInstIterator->getParent(); + RecursivelyDeleteTriviallyDeadInstructions(Repl); - // This address is now available for reassignment, so erase the table entry; - // we don't want to match some completely different instruction. - SunkAddrs[Addr] = 0; + + if (IterHandle != CurInstIterator) { + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } else { + // This address is now available for reassignment, so erase the table + // entry; we don't want to match some completely different instruction. + SunkAddrs[Addr] = 0; + } } ++NumMemoryInsts; return true; @@ -1073,6 +1122,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (CallInst *CI = dyn_cast<CallInst>(I)) return OptimizeCallInst(CI); + if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) + return DupRetToEnableTailCallOpts(RI); + return false; } @@ -1080,21 +1132,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { - bool MadeChange = false; - - // Split all critical edges where the dest block has a PHI. - if (CriticalEdgeSplit) { - TerminatorInst *BBTI = BB.getTerminator(); - if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { - BasicBlock *SuccBB = BBTI->getSuccessor(i); - if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, BackEdges, this); - } - } - } - SunkAddrs.clear(); + bool MadeChange = false; CurInstIterator = BB.begin(); for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index be12973..e275268 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index dbb68f3..8dbcc23 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -23,7 +23,6 @@ #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" -#include <set> using namespace llvm; STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 867a06a..53e4640 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -340,24 +340,35 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - int64_t Off1 = 0, Off2 = 0; - const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD); + int64_t EarlierOff = 0, LaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) return false; - - // Otherwise, we might have a situation like: - // store i16 -> P + 1 Byte - // store i32 -> P - // In this case, we see if the later store completely overlaps all bytes - // stored by the previous store. - if (Off1 < Off2 || // Earlier starts before Later. - Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later. - return false; - // Otherwise, we have complete overlap. - return true; + + // The later store completely overlaps the earlier store if: + // + // 1. Both start at the same offset and the later one's size is greater than + // or equal to the earlier one's, or + // + // |--earlier--| + // |-- later --| + // + // 2. The earlier store has an offset greater than the later offset, but which + // still lies completely within the later store. + // + // |--earlier--| + // |----- later ------| + // + // We have to be careful here as *Off is signed while *.Size is unsigned. + if (EarlierOff >= LaterOff && + uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) + return true; + + // Otherwise, they don't completely overlap. + return false; } /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -474,7 +485,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // away the store and we bail out. However, if we depend on on something // that overwrites the memory location we *can* potentially optimize it. // - // Find out what memory location the dependant instruction stores. + // Find out what memory location the dependent instruction stores. Instruction *DepWrite = InstDep.getInst(); AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); // If we didn't get a useful location, or if it isn't a size, bail out. @@ -631,28 +642,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (AA->doesNotAccessMemory(CS)) continue; - unsigned NumModRef = 0, NumOther = 0; - // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { - // If we detect that our AA is imprecise, it's not worth it to scan the - // rest of the DeadPointers set. Just assume that the AA will return - // ModRef for everything, and go ahead and bail out. - if (NumModRef >= 16 && NumOther == 0) - return MadeChange; - // See if the call site touches it. AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); - if (A == AliasAnalysis::ModRef) - ++NumModRef; - else - ++NumOther; - if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp deleted file mode 100644 index 4c3d188..0000000 --- a/lib/Transforms/Scalar/GEPSplitter.cpp +++ /dev/null @@ -1,83 +0,0 @@ -//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This function breaks GEPs with more than 2 non-zero operands into smaller -// GEPs each with no more than 2 non-zero operands. This exposes redundancy -// between GEPs with common initial operand sequences. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "split-geps" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -using namespace llvm; - -namespace { - class GEPSplitter : public FunctionPass { - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - public: - static char ID; // Pass identification, replacement for typeid - explicit GEPSplitter() : FunctionPass(ID) { - initializeGEPSplitterPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char GEPSplitter::ID = 0; -INITIALIZE_PASS(GEPSplitter, "split-geps", - "split complex GEPs into simple GEPs", false, false) - -FunctionPass *llvm::createGEPSplitterPass() { - return new GEPSplitter(); -} - -bool GEPSplitter::runOnFunction(Function &F) { - bool Changed = false; - - // Visit each GEP instruction. - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; ) - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) { - unsigned NumOps = GEP->getNumOperands(); - // Ignore GEPs which are already simple. - if (NumOps <= 2) - continue; - bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) && - cast<ConstantInt>(GEP->getOperand(1))->isZero(); - if (NumOps == 3 && FirstIndexIsZero) - continue; - // The first index is special and gets expanded with a 2-operand GEP - // (unless it's zero, in which case we can skip this). - Value *NewGEP = FirstIndexIsZero ? - GEP->getOperand(0) : - GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1), - "tmp", GEP); - // All remaining indices get expanded with a 3-operand GEP with zero - // as the second operand. - Value *Idxs[2]; - Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0); - for (unsigned i = 2; i != NumOps; ++i) { - Idxs[1] = GEP->getOperand(i); - NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP); - } - GEP->replaceAllUsesWith(NewGEP); - GEP->eraseFromParent(); - Changed = true; - } - - return Changed; -} - -void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a0123f5..efecb97 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -63,50 +63,48 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); namespace { struct Expression { uint32_t opcode; - const Type* type; + const Type *type; SmallVector<uint32_t, 4> varargs; - Expression() { } - Expression(uint32_t o) : opcode(o) { } + Expression(uint32_t o = ~2U) : opcode(o) { } bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; - else if (opcode == ~0U || opcode == ~1U) + if (opcode == ~0U || opcode == ~1U) return true; - else if (type != other.type) + if (type != other.type) return false; - else if (varargs != other.varargs) + if (varargs != other.varargs) return false; return true; } }; class ValueTable { - private: - DenseMap<Value*, uint32_t> valueNumbering; - DenseMap<Expression, uint32_t> expressionNumbering; - AliasAnalysis* AA; - MemoryDependenceAnalysis* MD; - DominatorTree* DT; - - uint32_t nextValueNumber; - - Expression create_expression(Instruction* I); - uint32_t lookup_or_add_call(CallInst* C); - public: - ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value *V); - uint32_t lookup(Value *V) const; - void add(Value *V, uint32_t num); - void clear(); - void erase(Value *v); - void setAliasAnalysis(AliasAnalysis* A) { AA = A; } - AliasAnalysis *getAliasAnalysis() const { return AA; } - void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } - void setDomTree(DominatorTree* D) { DT = D; } - uint32_t getNextUnusedValueNumber() { return nextValueNumber; } - void verifyRemoved(const Value *) const; + DenseMap<Value*, uint32_t> valueNumbering; + DenseMap<Expression, uint32_t> expressionNumbering; + AliasAnalysis *AA; + MemoryDependenceAnalysis *MD; + DominatorTree *DT; + + uint32_t nextValueNumber; + + Expression create_expression(Instruction* I); + uint32_t lookup_or_add_call(CallInst* C); + public: + ValueTable() : nextValueNumber(1) { } + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); + void clear(); + void erase(Value *v); + void setAliasAnalysis(AliasAnalysis* A) { AA = A; } + AliasAnalysis *getAliasAnalysis() const { return AA; } + void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } + void setDomTree(DominatorTree* D) { DT = D; } + uint32_t getNextUnusedValueNumber() { return nextValueNumber; } + void verifyRemoved(const Value *) const; }; } @@ -364,14 +362,14 @@ uint32_t ValueTable::lookup(Value *V) const { return VI->second; } -/// clear - Remove all entries from the ValueTable +/// clear - Remove all entries from the ValueTable. void ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); nextValueNumber = 1; } -/// erase - Remove a value from the value numbering +/// erase - Remove a value from the value numbering. void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -392,20 +390,11 @@ void ValueTable::verifyRemoved(const Value *V) const { namespace { class GVN : public FunctionPass { - bool runOnFunction(Function &F); - public: - static char ID; // Pass identification, replacement for typeid - explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { - initializeGVNPass(*PassRegistry::getPassRegistry()); - } - - private: bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; - const TargetData* TD; - + const TargetData *TD; + ValueTable VN; /// LeaderTable - A mapping from value numbers to lists of Value*'s that @@ -418,17 +407,39 @@ namespace { DenseMap<uint32_t, LeaderTableEntry> LeaderTable; BumpPtrAllocator TableAllocator; + SmallVector<Instruction*, 8> InstrsToErase; + public: + static char ID; // Pass identification, replacement for typeid + explicit GVN(bool noloads = false) + : FunctionPass(ID), NoLoads(noloads), MD(0) { + initializeGVNPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + /// markInstructionForDeletion - This removes the specified instruction from + /// our various maps and marks it for deletion. + void markInstructionForDeletion(Instruction *I) { + VN.erase(I); + InstrsToErase.push_back(I); + } + + const TargetData *getTargetData() const { return TD; } + DominatorTree &getDominatorTree() const { return *DT; } + AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } + MemoryDependenceAnalysis &getMemDep() const { return *MD; } + private: /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for /// its value number. void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { - LeaderTableEntry& Curr = LeaderTable[N]; + LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { Curr.Val = V; Curr.BB = BB; return; } - LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>(); + LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); Node->Val = V; Node->BB = BB; Node->Next = Curr.Next; @@ -474,19 +485,17 @@ namespace { AU.addPreserved<DominatorTree>(); AU.addPreserved<AliasAnalysis>(); } + // Helper fuctions // FIXME: eliminate or document these better - bool processLoad(LoadInst* L, - SmallVectorImpl<Instruction*> &toErase); - bool processInstruction(Instruction *I, - SmallVectorImpl<Instruction*> &toErase); - bool processNonLocalLoad(LoadInst* L, - SmallVectorImpl<Instruction*> &toErase); + bool processLoad(LoadInst *L); + bool processInstruction(Instruction *I); + bool processNonLocalLoad(LoadInst *L); bool processBlock(BasicBlock *BB); - void dump(DenseMap<uint32_t, Value*>& d); + void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); - bool performPRE(Function& F); + bool performPRE(Function &F); Value *findLeader(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; @@ -629,17 +638,17 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; + // If this is already the right type, just return it. const Type *StoredValTy = StoredVal->getType(); uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); - uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); + uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { - if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { - // Pointer to Pointer -> use bitcast. + // Pointer to Pointer -> use bitcast. + if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); - } // Convert source pointers to integers, which can be bitcast. if (StoredValTy->isPointerTy()) { @@ -796,6 +805,36 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, StorePtr, StoreSize, TD); } +/// AnalyzeLoadFromClobberingLoad - This function is called when we have a +/// memdep query of a load that ends up being clobbered by another load. See if +/// the other load can feed into the second load. +static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr, + LoadInst *DepLI, const TargetData &TD){ + // Cannot handle reading from store of first-class aggregate yet. + if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) + return -1; + + Value *DepPtr = DepLI->getPointerOperand(); + uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); + int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); + if (R != -1) return R; + + // If we have a load/load clobber an DepLI can be widened to cover this load, + // then we should widen it! + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); + if (Size == 0) return -1; + + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); +} + + + static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, MemIntrinsic *MI, const TargetData &TD) { @@ -843,9 +882,9 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means -/// that the store *may* provide bits used by the load but we can't be sure -/// because the pointers don't mustalias. Check this case to see if there is -/// anything more we can do before we give up. +/// that the store provides bits used by the load but we the pointers don't +/// mustalias. Check this case to see if there is anything more we can do +/// before we give up. static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, const Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ @@ -881,6 +920,69 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering load. This means +/// that the load *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, + const Type *LoadTy, Instruction *InsertPt, + GVN &gvn) { + const TargetData &TD = *gvn.getTargetData(); + // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to + // widen SrcVal out to a larger load. + unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + if (Offset+LoadSize > SrcValSize) { + assert(!SrcVal->isVolatile() && "Cannot widen volatile load!"); + assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load"); + // If we have a load/load clobber an DepLI can be widened to cover this + // load, then we should widen it to the next power of 2 size big enough! + unsigned NewLoadSize = Offset+LoadSize; + if (!isPowerOf2_32(NewLoadSize)) + NewLoadSize = NextPowerOf2(NewLoadSize); + + Value *PtrVal = SrcVal->getPointerOperand(); + + // Insert the new load after the old load. This ensures that subsequent + // memdep queries will find the new load. We can't easily remove the old + // load completely because it is already in the value numbering table. + IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); + const Type *DestPTy = + IntegerType::get(LoadTy->getContext(), NewLoadSize*8); + DestPTy = PointerType::get(DestPTy, + cast<PointerType>(PtrVal->getType())->getAddressSpace()); + + PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + LoadInst *NewLoad = Builder.CreateLoad(PtrVal); + NewLoad->takeName(SrcVal); + NewLoad->setAlignment(SrcVal->getAlignment()); + + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + + // Replace uses of the original load with the wider load. On a big endian + // system, we need to shift down to get the relevant bits. + Value *RV = NewLoad; + if (TD.isBigEndian()) + RV = Builder.CreateLShr(RV, + NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); + RV = Builder.CreateTrunc(RV, SrcVal->getType()); + SrcVal->replaceAllUsesWith(RV); + + // We would like to use gvn.markInstructionForDeletion here, but we can't + // because the load is already memoized into the leader map table that GVN + // tracks. It is potentially possible to remove the load from the table, + // but then there all of the operations based on it would need to be + // rehashed. Just leave the dead load around. + gvn.getMemDep().removeInstruction(SrcVal); + SrcVal = NewLoad; + } + + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); +} + + /// GetMemInstValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, @@ -943,11 +1045,12 @@ struct AvailableValueInBlock { BasicBlock *BB; enum ValType { SimpleVal, // A simple offsetted value that is accessed. + LoadVal, // A value produced by a load. MemIntrin // A memory intrinsic which is loaded from. }; /// V - The value that is live out of the block. - PointerIntPair<Value *, 1, ValType> Val; + PointerIntPair<Value *, 2, ValType> Val; /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; @@ -972,37 +1075,69 @@ struct AvailableValueInBlock { return Res; } + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(LI); + Res.Val.setInt(LoadVal); + Res.Offset = Offset; + return Res; + } + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } + bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + Value *getSimpleValue() const { assert(isSimpleValue() && "Wrong accessor"); return Val.getPointer(); } + LoadInst *getCoercedLoadValue() const { + assert(isCoercedLoadValue() && "Wrong accessor"); + return cast<LoadInst>(Val.getPointer()); + } + MemIntrinsic *getMemIntrinValue() const { - assert(!isSimpleValue() && "Wrong accessor"); + assert(isMemIntrinValue() && "Wrong accessor"); return cast<MemIntrinsic>(Val.getPointer()); } /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(const Type *LoadTy, - const TargetData *TD) const { + Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const { Value *Res; if (isSimpleValue()) { Res = getSimpleValue(); if (Res->getType() != LoadTy) { + const TargetData *TD = gvn.getTargetData(); assert(TD && "Need target data to handle type mismatch case"); Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); } + } else if (isCoercedLoadValue()) { + LoadInst *Load = getCoercedLoadValue(); + if (Load->getType() == LoadTy && Offset == 0) { + Res = Load; + } else { + Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), + gvn); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " + << *getCoercedLoadValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } } else { + const TargetData *TD = gvn.getTargetData(); + assert(TD && "Need target data to handle type mismatch case"); Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); } @@ -1010,21 +1145,20 @@ struct AvailableValueInBlock { } }; -} +} // end anonymous namespace /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, - const TargetData *TD, - const DominatorTree &DT, - AliasAnalysis *AA) { + GVN &gvn) { // Check for the fully redundant, dominating load case. In this case, we can // just use the dominating value directly. if (ValuesPerBlock.size() == 1 && - DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) - return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); + gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, + LI->getParent())) + return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; @@ -1040,14 +1174,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (SSAUpdate.HasValueForBlock(BB)) continue; - SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); } // Perform PHI construction. Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); // If new PHI nodes were created, notify alias analysis. - if (V->getType()->isPointerTy()) + if (V->getType()->isPointerTy()) { + AliasAnalysis *AA = gvn.getAliasAnalysis(); + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); @@ -1059,6 +1195,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) AA->addEscapingUse(P->getOperandUse(2*ii)); } + } return V; } @@ -1071,8 +1208,7 @@ static bool isLifetimeStart(const Instruction *Inst) { /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst *LI, - SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processNonLocalLoad(LoadInst *LI) { // Find the non-local dependencies of the load. SmallVector<NonLocalDepResult, 64> Deps; AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); @@ -1088,7 +1224,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { + if (Deps.size() == 1 && Deps[0].getResult().isClobber() && + Deps[0].getResult().getInst()->getParent() == LI->getParent()) { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); @@ -1129,6 +1266,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI != LI && Address && TD) { + int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), + LI->getPointerOperand(), + DepLI, *TD); + + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, + Offset)); + continue; + } + } + } // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. @@ -1187,7 +1344,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, continue; } } - ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); continue; } @@ -1206,16 +1363,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumGVNLoad; return true; } @@ -1429,22 +1584,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumPRELoad; return true; } /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. -bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processLoad(LoadInst *L) { if (!MD) return false; @@ -1454,8 +1607,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); - // If the value isn't available, don't do anything! - if (Dep.isClobber()) { + // If we have a clobber and target data is around, see if this is a clobber + // that we can fix up through code synthesis. + if (Dep.isClobber() && TD) { // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1467,26 +1621,40 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // completely covers this load. This sort of thing can happen in bitfield // access code. Value *AvailVal = 0; - if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) - if (TD) { - int Offset = AnalyzeLoadFromClobberingStore(L->getType(), - L->getPointerOperand(), - DepSI, *TD); - if (Offset != -1) - AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, - L->getType(), L, *TD); - } + if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { + int Offset = AnalyzeLoadFromClobberingStore(L->getType(), + L->getPointerOperand(), + DepSI, *TD); + if (Offset != -1) + AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, + L->getType(), L, *TD); + } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI == L) + return false; + + int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), + L->getPointerOperand(), + DepLI, *TD); + if (Offset != -1) + AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); + } // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { - if (TD) { - int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), - L->getPointerOperand(), - DepMI, *TD); - if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); - } + int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), + L->getPointerOperand(), + DepMI, *TD); + if (Offset != -1) + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); } if (AvailVal) { @@ -1497,14 +1665,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(AvailVal); if (AvailVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(AvailVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } - + } + + // If the value isn't available, don't do anything! + if (Dep.isClobber()) { DEBUG( - // fast print dep, using operator<< on instruction would be too slow + // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; WriteAsOperand(dbgs(), L); Instruction *I = Dep.getInst(); @@ -1515,7 +1685,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // If it is defined in another block, try harder. if (Dep.isNonLocal()) - return processNonLocalLoad(L, toErase); + return processNonLocalLoad(L); Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { @@ -1542,8 +1712,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(StoredVal); if (StoredVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(StoredVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1556,7 +1725,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // (depending on its type). if (DepLI->getType() != L->getType()) { if (TD) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), + L, *TD); if (AvailableVal == 0) return false; @@ -1571,8 +1741,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(AvailableVal); if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1582,19 +1751,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // intervening stores, for example. if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1634,8 +1801,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets -bool GVN::processInstruction(Instruction *I, - SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processInstruction(Instruction *I) { // Ignore dbg info intrinsics. if (isa<DbgInfoIntrinsic>(I)) return false; @@ -1648,20 +1814,17 @@ bool GVN::processInstruction(Instruction *I, I->replaceAllUsesWith(V); if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(I); - toErase.push_back(I); + markInstructionForDeletion(I); return true; } if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - bool Changed = processLoad(LI, toErase); - - if (!Changed) { - unsigned Num = VN.lookup_or_add(LI); - addToLeaderTable(Num, LI, LI->getParent()); - } + if (processLoad(LI)) + return true; - return Changed; + unsigned Num = VN.lookup_or_add(LI); + addToLeaderTable(Num, LI, LI->getParent()); + return false; } // For conditions branches, we can perform simple conditional propagation on @@ -1720,11 +1883,10 @@ bool GVN::processInstruction(Instruction *I, } // Remove it! - VN.erase(I); I->replaceAllUsesWith(repl); if (MD && repl->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); + markInstructionForDeletion(I); return true; } @@ -1781,35 +1943,36 @@ bool GVN::runOnFunction(Function& F) { bool GVN::processBlock(BasicBlock *BB) { - // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and - // incrementing BI before processing an instruction). - SmallVector<Instruction*, 8> toErase; + // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function + // (and incrementing BI before processing an instruction). + assert(InstrsToErase.empty() && + "We expect InstrsToErase to be empty across iterations"); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - ChangedFunction |= processInstruction(BI, toErase); - if (toErase.empty()) { + ChangedFunction |= processInstruction(BI); + if (InstrsToErase.empty()) { ++BI; continue; } // If we need some instructions deleted, do it now. - NumGVNInstr += toErase.size(); + NumGVNInstr += InstrsToErase.size(); // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) --BI; - for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), - E = toErase.end(); I != E; ++I) { + for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(), + E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); } - toErase.clear(); + InstrsToErase.clear(); if (AtStart) BI = BB->begin(); @@ -1944,11 +2107,11 @@ bool GVN::performPRE(Function &F) { addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), + pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); + PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; Phi->addIncoming(predMap[P], P); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 0fb6798..09d569a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -73,6 +73,7 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; + SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: @@ -98,6 +99,7 @@ namespace { } private: + bool isValidRewrite(Value *FromVal, Value *ToVal); void EliminateIVComparisons(); void EliminateIVRemainders(); @@ -134,6 +136,53 @@ Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); } +/// isValidRewrite - Return true if the SCEV expansion generated by the +/// rewriter can replace the original value. SCEV guarantees that it +/// produces the same value, but the way it is produced may be illegal IR. +/// Ideally, this function will only be called for verification. +bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { + // If an SCEV expression subsumed multiple pointers, its expansion could + // reassociate the GEP changing the base pointer. This is illegal because the + // final address produced by a GEP chain must be inbounds relative to its + // underlying object. Otherwise basic alias analysis, among other things, + // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid + // producing an expression involving multiple pointers. Until then, we must + // bail out here. + // + // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject + // because it understands lcssa phis while SCEV does not. + Value *FromPtr = FromVal; + Value *ToPtr = ToVal; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) { + FromPtr = GEP->getPointerOperand(); + } + if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) { + ToPtr = GEP->getPointerOperand(); + } + if (FromPtr != FromVal || ToPtr != ToVal) { + // Quickly check the common case + if (FromPtr == ToPtr) + return true; + + // SCEV may have rewritten an expression that produces the GEP's pointer + // operand. That's ok as long as the pointer operand has the same base + // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the + // base of a recurrence. This handles the case in which SCEV expansion + // converts a pointer type recurrence into a nonrecurrent pointer base + // indexed by an integer recurrence. + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); + const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); + if (FromBase == ToBase) + return true; + + DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " + << *FromBase << " != " << *ToBase << "\n"); + + return false; + } + return true; +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the @@ -226,7 +275,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // update the branch to use the new comparison; in the common case this // will make old comparison dead. BI->setCondition(Cond); - RecursivelyDeleteTriviallyDeadInstructions(OrigCond); + DeadInsts.push_back(OrigCond); ++NumLFTR; Changed = true; @@ -304,14 +353,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { if (!SE->isLoopInvariant(ExitValue, L)) continue; - Changed = true; - ++NumReplaced; - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); + if (!isValidRewrite(Inst, ExitVal)) { + DeadInsts.push_back(ExitVal); + continue; + } + Changed = true; + ++NumReplaced; + PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. @@ -366,8 +419,6 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { } void IndVarSimplify::EliminateIVComparisons() { - SmallVector<WeakVH, 16> DeadInsts; - // Look for ICmp users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; @@ -399,18 +450,9 @@ void IndVarSimplify::EliminateIVComparisons() { DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); DeadInsts.push_back(ICmp); } - - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } void IndVarSimplify::EliminateIVRemainders() { - SmallVector<WeakVH, 16> DeadInsts; - // Look for SRem and URem users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; @@ -466,13 +508,6 @@ void IndVarSimplify::EliminateIVRemainders() { DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); DeadInsts.push_back(Rem); } - - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -491,6 +526,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); + DeadInsts.clear(); Changed = false; // If there are any floating-point recurrences, attempt to @@ -589,9 +625,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ExitingBlock, BI, Rewriter); } - // Rewrite IV-derived expressions. Clears the rewriter cache. + // Rewrite IV-derived expressions. RewriteIVExpressions(L, Rewriter); + // Clear the rewriter cache, because values that are in the rewriter's cache + // can be deleted in the loop below, causing the AssertingVH in the cache to + // trigger. + Rewriter.clear(); + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + // The Rewriter may not be used from this point on. // Loop-invariant instructions in the preheader that aren't used in the @@ -632,7 +680,7 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { if (!isSafe(*I, L, SE)) return false; return true; } - + // A cast is safe if its operand is. if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) return isSafe(C->getOperand(), L, SE); @@ -651,8 +699,6 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { } void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { - SmallVector<WeakVH, 16> DeadInsts; - // Rewrite all induction variable expressions in terms of the canonical // induction variable. // @@ -705,6 +751,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // Now expand it into actual Instructions and patch it into place. Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + + if (!isValidRewrite(Op, NewVal)) { + DeadInsts.push_back(NewVal); + continue; + } // Inform ScalarEvolution that this value is changing. The change doesn't // affect its value, but it does potentially affect which use lists the // value will be on after the replacement, which affects ScalarEvolution's @@ -717,25 +770,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); + ++NumRemoved; Changed = true; // The old value may be dead now. DeadInsts.push_back(Op); } - - // Clear the rewriter cache, because values that are in the rewriter's cache - // can be deleted in the loop below, causing the AssertingVH in the cache to - // trigger. - Rewriter.clear(); - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } /// If there's a single exit block, sink any loop-invariant values that @@ -859,7 +900,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { BinaryOperator *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - + // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); @@ -884,7 +925,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (Compare == 0 || !Compare->hasOneUse() || !isa<BranchInst>(Compare->use_back())) return; - + BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); // We need to verify that the branch actually controls the iteration count @@ -896,8 +937,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { (L->contains(TheBr->getSuccessor(0)) && L->contains(TheBr->getSuccessor(1)))) return; - - + + // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); @@ -905,7 +946,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (ExitValueVal == 0 || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; - + // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (Compare->getPredicate()) { @@ -923,13 +964,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } - + // We convert the floating point induction variable to a signed i32 value if // we can. This is only safe if the comparison will not overflow in a way // that won't be trapped by the integer equivalent operations. Check for this // now. // TODO: We could use i64 if it is native and the range requires it. - + // The start/stride/exit values must all fit in signed i32. if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) return; @@ -945,59 +986,59 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) return; - + uint32_t Range = uint32_t(ExitValue-InitValue); if (NewPred == CmpInst::ICMP_SLE) { // Normalize SLE -> SLT, check for infinite loop. if (++Range == 0) return; // Range overflows. } - + unsigned Leftover = Range % uint32_t(IncValue); - + // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; - + // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; - + } else { // If we have a negative stride, we require the init to be greater than the // exit value and an equality or greater than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) return; - + uint32_t Range = uint32_t(InitValue-ExitValue); if (NewPred == CmpInst::ICMP_SGE) { // Normalize SGE -> SGT, check for infinite loop. if (++Range == 0) return; // Range overflows. } - + unsigned Leftover = Range % uint32_t(-IncValue); - + // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; - + // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) return; } - + const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN); + PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), PN->getIncomingBlock(IncomingEdge)); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 90094a8..7168177 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -16,6 +16,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" @@ -170,9 +171,9 @@ bool JumpThreading::runOnFunction(Function &F) { Changed = true; continue; } - + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); - + // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. @@ -608,7 +609,7 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { static bool hasAddressTakenAndUsed(BasicBlock *BB) { if (!BB->hasAddressTaken()) return false; - + // If the block has its address taken, it may be a tree of dead constants // hanging off of it. These shouldn't keep the block alive. BlockAddress *BA = BlockAddress::get(BB); @@ -668,6 +669,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { return false; // Must be an invoke. } + // Run constant folding to see if we can reduce the condition to a simple + // constant. + if (Instruction *I = dyn_cast<Instruction>(Condition)) { + Value *SimpleVal = ConstantFoldInstruction(I, TD); + if (SimpleVal) { + I->replaceAllUsesWith(SimpleVal); + I->eraseFromParent(); + Condition = SimpleVal; + } + } + // If the terminator is branching on an undef, we can pick any of the // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa<UndefValue>(Condition)) { @@ -928,13 +940,14 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); // Create a PHI node at the start of the block for the PRE'd load value. - PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin()); + pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB); + PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", + LoadBB->begin()); PN->takeName(LI); // Insert new entries into the PHI for each predecessor. A single block may // have multiple entries here. - for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; - ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 0786793..93de9cf 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -445,7 +445,8 @@ void LICM::sink(Instruction &I) { // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { - if (!DT->dominates(I.getParent(), ExitBlocks[0])) { + if (!isa<DbgInfoIntrinsic>(I) && + !DT->dominates(I.getParent(), ExitBlocks[0])) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. @@ -742,30 +743,13 @@ void LICM::PromoteAliasSet(AliasSet &AS) { Preheader->getTerminator()); SSA.AddAvailableValue(Preheader, PreheaderLoad); - // Copy any value stored to or loaded from a must-alias of the pointer. - if (PreheaderLoad->getType()->isPointerTy()) { - Value *SomeValue; - if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0])) - SomeValue = LI; - else - SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand(); - - CurAST->copyValue(SomeValue, PreheaderLoad); - } - // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); - - // If the preheader load is itself a pointer, we need to tell alias analysis - // about the new pointer we created in the preheader block and about any PHI - // nodes that just got inserted. - if (PreheaderLoad->getType()->isPointerTy()) { - for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) - CurAST->copyValue(PreheaderLoad, NewPHIs[i]); - } - - // fwew, we're done! + + // If the SSAUpdater didn't use the load in the preheader, just zap it now. + if (PreheaderLoad->use_empty()) + PreheaderLoad->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index f8ce214..1366231 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -81,7 +81,7 @@ namespace { bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); - + bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *SplatValue, Instruction *TheStore, @@ -91,7 +91,7 @@ namespace { const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, const SCEV *BECount); - + /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. /// @@ -134,50 +134,50 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { SmallVector<Instruction*, 32> NowDeadInsts; - + NowDeadInsts.push_back(I); - + // Before we touch this instruction, remove it from SE! do { Instruction *DeadInst = NowDeadInsts.pop_back_val(); - + // This instruction is dead, zap it, in stages. Start by removing it from // SCEV. SE.forgetValue(DeadInst); - + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); DeadInst->setOperand(op, 0); - + // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; - + if (Instruction *OpI = dyn_cast<Instruction>(Op)) if (isInstructionTriviallyDead(OpI)) NowDeadInsts.push_back(OpI); } - + DeadInst->eraseFromParent(); - + } while (!NowDeadInsts.empty()); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; - + // The trip count of the loop must be analyzable. SE = &getAnalysis<ScalarEvolution>(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) return false; const SCEV *BECount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECount)) return false; - + // If this loop executes exactly one time, then it should be peeled, not // optimized by this pass. if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) if (BECst->getValue()->getValue() == 0) return false; - + // We require target data for now. TD = getAnalysisIfAvailable<TargetData>(); if (TD == 0) return false; @@ -185,14 +185,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis<DominatorTree>(); LoopInfo &LI = getAnalysis<LoopInfo>(); TLI = &getAnalysis<TargetLibraryInfo>(); - + SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); DEBUG(dbgs() << "loop-idiom Scanning: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - + bool MadeChange = false; // Scan all the blocks in the loop that are not in subloops. for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; @@ -200,7 +200,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { // Ignore blocks in subloops. if (LI.getLoopFor(*BI) != CurLoop) continue; - + MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); } return MadeChange; @@ -217,7 +217,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) if (!DT->dominates(BB, ExitBlocks[i])) return false; - + bool MadeChange = false; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; @@ -226,20 +226,20 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, WeakVH InstPtr(I); if (!processLoopStore(SI, BECount)) continue; MadeChange = true; - + // If processing the store invalidated our iterator, start over from the // top of the block. if (InstPtr == 0) I = BB->begin(); continue; } - + // Look for memset instructions, which may be optimized to a larger memset. if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { WeakVH InstPtr(I); if (!processLoopMemSet(MSI, BECount)) continue; MadeChange = true; - + // If processing the memset invalidated our iterator, start over from the // top of the block. if (InstPtr == 0) @@ -247,7 +247,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, continue; } } - + return MadeChange; } @@ -258,12 +258,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); - + // Reject stores that are so large that they overflow an unsigned. uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) return false; - + // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. @@ -274,9 +274,9 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. - unsigned StoreSize = (unsigned)SizeInBits >> 3; + unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); - + if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { // TODO: Could also handle negative stride here someday, that will require // the validity check in mayLoopAccessLocation to be updated though. @@ -285,7 +285,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; dbgs() << "BB: " << *SI->getParent(); } - + return false; } @@ -319,9 +319,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // If we're not allowed to hack on memset, we fail. if (!TLI->has(LibFunc::memset)) return false; - + Value *Pointer = MSI->getDest(); - + // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. @@ -333,16 +333,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); if ((SizeInBytes >> 32) != 0) return false; - + // Check to see if the stride matches the size of the memset. If so, then we // know that every byte is touched in the loop. const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); - + // TODO: Could also handle negative stride here someday, that will require the // validity check in mayLoopAccessLocation to be updated though. if (Stride == 0 || MSI->getLength() != Stride->getValue()) return false; - + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, MSI->getAlignment(), MSI->getValue(), MSI, Ev, BECount); @@ -365,7 +365,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, // to be exactly the size of the memset, which is (BECount+1)*StoreSize if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; - + // TODO: For this to be really effective, we have to dive into the pointer // operand in the store. Store to &A[i] of 100 will always return may alias // with store of &A[100], we need to StoreLoc to be "A" with size of 100, @@ -394,12 +394,12 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { // that doesn't seem worthwhile. Constant *C = dyn_cast<Constant>(V); if (C == 0) return 0; - + // Only handle simple values that are a power of two bytes in size. uint64_t Size = TD.getTypeSizeInBits(V->getType()); if (Size == 0 || (Size & 7) || (Size & (Size-1))) return 0; - + // Don't care enough about darwin/ppc to implement this. if (TD.isBigEndian()) return 0; @@ -410,7 +410,7 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see // if the top and bottom are the same (e.g. for vectors and large integers). if (Size > 16) return 0; - + // If the constant is exactly 16 bytes, just use it. if (Size == 16) return C; @@ -428,14 +428,14 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount) { - + // If the stored value is a byte-wise value (like i32 -1), then it may be // turned into a memset of i8 -1, assuming that all the consecutive bytes // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = 0; - + // If we're allowed to form a memset, and the stored value would be acceptable // for memset, use it. if (SplatValue && TLI->has(LibFunc::memset) && @@ -453,8 +453,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // do anything with a 3-byte store, for example. return false; } - - + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -463,47 +463,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) return false; - + // Okay, everything looks good, insert the memset. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - + IRBuilder<> Builder(Preheader->getTerminator()); - + // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. Just insert code for it in the preheader. SCEVExpander Expander(*SE); - + unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); - Value *BasePtr = + Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - true /*no unsigned overflow*/); + SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - true /*no unsigned overflow*/); - - Value *NumBytes = + SCEV::FlagNUW); + + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - - Value *NewCall; + + CallInst *NewCall; if (SplatValue) NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); else { Module *M = TheStore->getParent()->getParent()->getParent(); Value *MSP = M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), - Builder.getInt8PtrTy(), + Builder.getInt8PtrTy(), Builder.getInt8PtrTy(), IntPtr, (void*)0); - + // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, @@ -514,11 +514,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); } - + DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" << " from store to: " << *Ev << " at: " << *TheStore << "\n"); - (void)NewCall; - + NewCall->setDebugLoc(TheStore->getDebugLoc()); + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. DeleteDeadInstruction(TheStore, *SE); @@ -536,9 +536,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // If we're not allowed to form memcpy, we fail. if (!TLI->has(LibFunc::memcpy)) return false; - + LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); - + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -555,49 +555,49 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), SI)) return false; - + // Okay, everything looks good, insert the memcpy. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - + IRBuilder<> Builder(Preheader->getTerminator()); - + // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. Just insert code for it in the preheader. SCEVExpander Expander(*SE); - Value *LoadBasePtr = + Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); - Value *StoreBasePtr = + Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(SI->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - true /*no unsigned overflow*/); + SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - true /*no unsigned overflow*/); - + SCEV::FlagNUW); + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - + Value *NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, std::min(SI->getAlignment(), LI->getAlignment())); - + DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); (void)NewCall; - + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. DeleteDeadInstruction(SI, *SE); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 95e1578..47dced3 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -184,7 +184,11 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); BasicBlock *OrigLatch = L->getLoopLatch(); - assert(OrigPreheader && OrigLatch && "Loop not in canonical form?"); + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (OrigPreheader == 0 || OrigLatch == 0) + return false; // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. @@ -322,7 +326,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // We can fold the conditional branch in the preheader, this makes things // simpler. The first step is to remove the extra edge to the Exit block. Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst::Create(NewHeader, PHBI); + BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); + NewBI->setDebugLoc(PHBI->getDebugLoc()); PHBI->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index ac4aea2..5abc790 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -253,7 +253,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L, DoInitialMatch(AR->getStart(), L, Good, Bad, SE); DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), + // FIXME: AR->getNoWrapFlags() + AR->getLoop(), SCEV::FlagAnyWrap), L, Good, Bad, SE); return; } @@ -455,7 +456,10 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, IgnoreSignificantBits); if (!Start) return 0; - return SE.getAddRecExpr(Start, Step, AR->getLoop()); + // FlagNW is independent of the start value, step direction, and is + // preserved with smaller magnitude steps. + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); } return 0; } @@ -520,7 +524,9 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); if (Result != 0) - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -545,7 +551,9 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); if (Result) - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -564,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::prefetch: - case Intrinsic::x86_sse2_loadu_dq: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse_loadu_ps: case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: @@ -781,7 +786,7 @@ void Cost::RateFormula(const Formula &F, } } -/// Loose - Set this cost to a loosing value. +/// Loose - Set this cost to a losing value. void Cost::Loose() { NumRegs = ~0u; AddRecCost = ~0u; @@ -1483,7 +1488,7 @@ void LSRInstance::OptimizeShadowIV() { if (!C->getValue().isStrictlyPositive()) continue; /* Add new PHINode. */ - PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH); /* create new increment. '++d' in above example. */ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); @@ -1819,7 +1824,7 @@ LSRInstance::OptimizeLoopTermCond() { } } -/// reconcileNewOffset - Determine if the given use can accomodate a fixup +/// reconcileNewOffset - Determine if the given use can accommodate a fixup /// at the given offset and other details. If so, update the use and /// return true. bool @@ -2236,7 +2241,9 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), + AR->getLoop(), + //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap), C, Ops, L, SE); CollectSubexprs(AR->getStart(), C, Ops, L, SE); return; @@ -3047,7 +3054,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { } } -/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call /// FilterOutUndesirableDedicatedRegisters again, if necessary, now that /// we've done more filtering, as it may be able to find more formulae to /// eliminate. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 80b263a..fef6bc3 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -43,7 +43,13 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(ID) { + LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) { + CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); + CurrentCount = (C == -1) ? UnrollCount : unsigned(C); + CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; + + UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -56,7 +62,10 @@ namespace { // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; + unsigned CurrentCount; unsigned CurrentThreshold; + bool CurrentAllowPartial; + bool UserThreshold; // CurrentThreshold is user-specified. bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -87,7 +96,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } +Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { + return new LoopUnroll(Threshold, Count, AllowPartial); +} /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { @@ -119,14 +130,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // from UnrollThreshold, it is overridden to a smaller value if the current // function is marked as optimize-for-size, and the unroll threshold was // not user specified. - CurrentThreshold = UnrollThreshold; - if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) && - UnrollThreshold.getNumOccurrences() == 0) - CurrentThreshold = OptSizeUnrollThreshold; + unsigned Threshold = CurrentThreshold; + if (!UserThreshold && + Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Threshold = OptSizeUnrollThreshold; // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); - unsigned Count = UnrollCount; + unsigned Count = CurrentCount; // Automatically select an unroll count. if (Count == 0) { @@ -140,7 +151,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Enforce the threshold. - if (CurrentThreshold != NoThreshold) { + if (Threshold != NoThreshold) { unsigned NumInlineCandidates; unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); @@ -149,16 +160,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > CurrentThreshold) { + if (TripCount != 1 && Size > Threshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << CurrentThreshold << "\n"); - if (!UnrollAllowPartial) { + << " because size: " << Size << ">" << Threshold << "\n"); + if (!CurrentAllowPartial) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = CurrentThreshold / LoopSize; + Count = Threshold / LoopSize; while (Count != 0 && TripCount%Count != 0) { Count--; } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index bde0e53..a3035cb 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <list> using namespace llvm; @@ -299,12 +300,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, namespace { class MemCpyOpt : public FunctionPass { MemoryDependenceAnalysis *MD; + TargetLibraryInfo *TLI; const TargetData *TD; public: static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); MD = 0; + TLI = 0; + TD = 0; } bool runOnFunction(Function &F); @@ -316,6 +320,7 @@ namespace { AU.addRequired<DominatorTree>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); } @@ -346,6 +351,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) @@ -688,7 +694,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, if (M->getSource() == MDep->getSource()) return false; - // Second, the length of the memcpy's must be the same, or the preceeding one + // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); @@ -804,6 +810,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + if (!TLI->has(LibFunc::memmove)) + return false; + // See if the pointers alias. if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; @@ -935,6 +944,14 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis<MemoryDependenceAnalysis>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + + // If we don't have at least memset and memcpy, there is little point of doing + // anything here. These are required by a freestanding implementation, so if + // even they are disabled, there is no point in trying hard. + if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) + return false; + while (1) { if (!iterateOnFunction(F)) break; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index e093b52..c1dfe15 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -74,6 +75,8 @@ namespace { class Reassociate : public FunctionPass { DenseMap<BasicBlock*, unsigned> RankMap; DenseMap<AssertingVH<>, unsigned> ValueRankMap; + SmallVector<WeakVH, 8> RedoInsts; + SmallVector<WeakVH, 8> DeadInsts; bool MadeChange; public: static char ID; // Pass identification, replacement for typeid @@ -98,7 +101,7 @@ namespace { void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); void LinearizeExpr(BinaryOperator *I); Value *RemoveFactorFromExpression(Value *V, Value *Factor); - void ReassociateBB(BasicBlock *BB); + void ReassociateInst(BasicBlock::iterator &BBI); void RemoveDeadBinaryOp(Value *V); }; @@ -113,13 +116,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } void Reassociate::RemoveDeadBinaryOp(Value *V) { Instruction *Op = dyn_cast<Instruction>(V); - if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty()) + if (!Op || !isa<BinaryOperator>(Op)) return; Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); ValueRankMap.erase(Op); - Op->eraseFromParent(); + DeadInsts.push_back(Op); RemoveDeadBinaryOp(LHS); RemoveDeadBinaryOp(RHS); } @@ -214,6 +217,7 @@ static Instruction *LowerNegateToMultiply(Instruction *Neg, ValueRankMap.erase(Neg); Res->takeName(Neg); Neg->replaceAllUsesWith(Res); + Res->setDebugLoc(Neg->getDebugLoc()); Neg->eraseFromParent(); return Res; } @@ -503,6 +507,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, // Everyone now refers to the add instruction. ValueRankMap.erase(Sub); Sub->replaceAllUsesWith(New); + New->setDebugLoc(Sub->getDebugLoc()); Sub->eraseFromParent(); DEBUG(dbgs() << "Negated: " << *New << '\n'); @@ -528,6 +533,7 @@ static Instruction *ConvertShiftToMul(Instruction *Shl, ValueRankMap.erase(Shl); Mul->takeName(Shl); Shl->replaceAllUsesWith(Mul); + Mul->setDebugLoc(Shl->getDebugLoc()); Shl->eraseFromParent(); return Mul; } @@ -603,7 +609,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { // remaining operand. if (Factors.size() == 1) { ValueRankMap.erase(BO); - BO->eraseFromParent(); + DeadInsts.push_back(BO); V = Factors[0].Op; } else { RewriteExprTree(BO, Factors); @@ -732,7 +738,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Now that we have inserted a multiply, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 - Mul = ReassociateExpression(cast<BinaryOperator>(Mul)); + RedoInsts.push_back(Mul); // If every add operand was a duplicate, return the multiply. if (Ops.empty()) @@ -960,71 +966,69 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, } -/// ReassociateBB - Inspect all of the instructions in this basic block, -/// reassociating them as we go. -void Reassociate::ReassociateBB(BasicBlock *BB) { - for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { - Instruction *BI = BBI++; - if (BI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(BI->getOperand(1))) - if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { - MadeChange = true; - BI = NI; - } +/// ReassociateInst - Inspect and reassociate the instruction at the +/// given position, post-incrementing the position. +void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { + Instruction *BI = BBI++; + if (BI->getOpcode() == Instruction::Shl && + isa<ConstantInt>(BI->getOperand(1))) + if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { + MadeChange = true; + BI = NI; + } - // Reject cases where it is pointless to do this. - if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || - BI->getType()->isVectorTy()) - continue; // Floating point ops are not associative. - - // Do not reassociate boolean (i1) expressions. We want to preserve the - // original order of evaluation for short-circuited comparisons that - // SimplifyCFG has folded to AND/OR expressions. If the expression - // is not further optimized, it is likely to be transformed back to a - // short-circuited form for code gen, and the source order may have been - // optimized for the most likely conditions. - if (BI->getType()->isIntegerTy(1)) - continue; + // Reject cases where it is pointless to do this. + if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || + BI->getType()->isVectorTy()) + return; // Floating point ops are not associative. + + // Do not reassociate boolean (i1) expressions. We want to preserve the + // original order of evaluation for short-circuited comparisons that + // SimplifyCFG has folded to AND/OR expressions. If the expression + // is not further optimized, it is likely to be transformed back to a + // short-circuited form for code gen, and the source order may have been + // optimized for the most likely conditions. + if (BI->getType()->isIntegerTy(1)) + return; - // If this is a subtract instruction which is not already in negate form, - // see if we can convert it to X+-Y. - if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(BI)) { - BI = BreakUpSubtract(BI, ValueRankMap); - // Reset the BBI iterator in case BreakUpSubtract changed the - // instruction it points to. - BBI = BI; - ++BBI; + // If this is a subtract instruction which is not already in negate form, + // see if we can convert it to X+-Y. + if (BI->getOpcode() == Instruction::Sub) { + if (ShouldBreakUpSubtract(BI)) { + BI = BreakUpSubtract(BI, ValueRankMap); + // Reset the BBI iterator in case BreakUpSubtract changed the + // instruction it points to. + BBI = BI; + ++BBI; + MadeChange = true; + } else if (BinaryOperator::isNeg(BI)) { + // Otherwise, this is a negation. See if the operand is a multiply tree + // and if this is not an inner node of a multiply tree. + if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && + (!BI->hasOneUse() || + !isReassociableOp(BI->use_back(), Instruction::Mul))) { + BI = LowerNegateToMultiply(BI, ValueRankMap); MadeChange = true; - } else if (BinaryOperator::isNeg(BI)) { - // Otherwise, this is a negation. See if the operand is a multiply tree - // and if this is not an inner node of a multiply tree. - if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && - (!BI->hasOneUse() || - !isReassociableOp(BI->use_back(), Instruction::Mul))) { - BI = LowerNegateToMultiply(BI, ValueRankMap); - MadeChange = true; - } } } + } - // If this instruction is a commutative binary operator, process it. - if (!BI->isAssociative()) continue; - BinaryOperator *I = cast<BinaryOperator>(BI); + // If this instruction is a commutative binary operator, process it. + if (!BI->isAssociative()) return; + BinaryOperator *I = cast<BinaryOperator>(BI); - // If this is an interior node of a reassociable tree, ignore it until we - // get to the root of the tree, to avoid N^2 analysis. - if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) - continue; + // If this is an interior node of a reassociable tree, ignore it until we + // get to the root of the tree, to avoid N^2 analysis. + if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) + return; - // If this is an add tree that is used by a sub instruction, ignore it - // until we process the subtract. - if (I->hasOneUse() && I->getOpcode() == Instruction::Add && - cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) - continue; + // If this is an add tree that is used by a sub instruction, ignore it + // until we process the subtract. + if (I->hasOneUse() && I->getOpcode() == Instruction::Add && + cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) + return; - ReassociateExpression(I); - } + ReassociateExpression(I); } Value *Reassociate::ReassociateExpression(BinaryOperator *I) { @@ -1051,6 +1055,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // eliminate it. DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); + if (Instruction *VI = dyn_cast<Instruction>(V)) + VI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); ++NumAnnihil; return V; @@ -1074,6 +1080,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // This expression tree simplified to something that isn't a tree, // eliminate it. I->replaceAllUsesWith(Ops[0].Op); + if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) + OI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); return Ops[0].Op; } @@ -1091,7 +1099,21 @@ bool Reassociate::runOnFunction(Function &F) { MadeChange = false; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - ReassociateBB(FI); + for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) + ReassociateInst(BBI); + + // Now that we're done, revisit any instructions which are likely to + // have secondary reassociation opportunities. + while (!RedoInsts.empty()) + if (Value *V = RedoInsts.pop_back_val()) { + BasicBlock::iterator BBI = cast<Instruction>(V); + ReassociateInst(BBI); + } + + // Now that we're done, delete any instructions which are no longer used. + while (!DeadInsts.empty()) + if (Value *V = DeadInsts.pop_back_val()) + RecursivelyDeleteTriviallyDeadInstructions(V); // We are done with the rank map. RankMap.clear(); diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 459bb06..47afc77 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -9,7 +9,7 @@ // // This file demotes all registers to memory references. It is intented to be // the inverse of PromoteMemoryToRegister. By converting to loads, the only -// values live accross basic blocks are allocas and loads before phi nodes. +// values live across basic blocks are allocas and loads before phi nodes. // It is intended that this should make CFG hacking much easier. // To make later hacking easier, the entry block is split into two, such that // all introduced allocas and nothing else are in the entry block. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index c82e929..db8eb85 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1989,7 +1989,7 @@ bool IPSCCP::runOnModule(Module &M) { ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); } - // If we infered constant or undef values for globals variables, we can delete + // If we inferred constant or undef values for globals variables, we can delete // the global and any stores that remain to it. const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals(); for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(), diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index bf9ca6d..32a0506 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -17,6 +17,7 @@ #include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" @@ -34,7 +35,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeDCEPass(Registry); initializeDeadInstEliminationPass(Registry); initializeDSEPass(Registry); - initializeGEPSplitterPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); initializeIndVarSimplifyPass(Registry); @@ -56,7 +56,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSROA_DTPass(Registry); initializeSROA_SSAUpPass(Registry); initializeCFGSimplifyPassPass(Registry); - initializeSimplifyHalfPowrLibCallsPass(Registry); initializeSimplifyLibCallsPass(Registry); initializeSinkingPass(Registry); initializeTailDupPass(Registry); @@ -103,6 +102,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopDeletionPass()); } +void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopIdiomPass()); +} + void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopRotatePass()); } @@ -135,6 +138,10 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createScalarReplAggregatesPass()); } +void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) { + unwrap(PM)->add(createScalarReplAggregatesPass(-1, false)); +} + void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM, int Threshold) { unwrap(PM)->add(createScalarReplAggregatesPass(Threshold)); @@ -159,3 +166,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); } + +void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCorrelatedValuePropagationPass()); +} + +void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createEarlyCSEPass()); +} + +void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTypeBasedAliasAnalysisPass()); +} + +void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createBasicAliasAnalysisPass()); +} diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index c3ca852..8178c27 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -219,7 +219,7 @@ namespace { /// optimization, which scans the uses of an alloca and determines if it can /// rewrite it in terms of a single new alloca that can be mem2reg'd. class ConvertToScalarInfo { - /// AllocaSize - The size of the alloca being considered. + /// AllocaSize - The size of the alloca being considered in bytes. unsigned AllocaSize; const TargetData &TD; @@ -238,19 +238,22 @@ class ConvertToScalarInfo { /// also declared as a vector, we do want to promote to a vector. bool HadAVector; + /// HadNonMemTransferAccess - True if there is at least one access to the + /// alloca that is not a MemTransferInst. We don't want to turn structs into + /// large integers unless there is some potential for optimization. + bool HadNonMemTransferAccess; + public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td) { - IsNotTrivial = false; - VectorTy = 0; - HadAVector = false; - } + : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0), + HadAVector(false), HadNonMemTransferAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset); + void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore); + bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, @@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { + unsigned BitWidth = AllocaSize * 8; + if (!HadAVector && !HadNonMemTransferAccess && + !TD.fitsInLegalInteger(BitWidth)) + return 0; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); + NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); @@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) /// so far at the offset specified by Offset (which is specified in bytes). /// -/// There are two cases we handle here: +/// There are three cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. /// Here we turn element accesses into insert/extract element operations. /// This promotes a <4 x float> with a store of float to the third element /// into a <4 x float> that uses insert element. -/// 2) A fully general blob of memory, which we turn into some (potentially +/// 2) A union of vector types with power-of-2 size differences, e.g. a float, +/// <2 x float> and <4 x float>. Here we turn element accesses into insert +/// and extract element operations, and <2 x float> accesses into a cast to +/// <2 x double>, an extract, and a cast back to <2 x float>. +/// 3) A fully general blob of memory, which we turn into some (potentially /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { +void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, + bool IsLoadOrStore) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. if (VectorTy && VectorTy->isVoidTy()) @@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { - // Remember if we saw a vector type. - HadAVector = true; - - if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { - // If we're storing/loading a vector of the right size, allow it as a - // vector. If this the first vector we see, remember the type so that - // we know the element size. If this is a subsequent access, ignore it - // even if it is a differing type but the same size. Worst case we can - // bitcast the resultant vectors. - if (VectorTy == 0) - VectorTy = VInTy; + if (MergeInVectorType(VInTy, Offset)) return; - } } else if (In->isFloatTy() || In->isDoubleTy() || (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // Full width accesses can be ignored, because they can always be turned + // into bitcasts. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (IsLoadOrStore && EltSize == AllocaSize) + return; + // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (VectorTy == 0 || - cast<VectorType>(VectorTy)->getElementType() - ->getPrimitiveSizeInBits()/8 == EltSize)) { - if (VectorTy == 0) + if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) { + if (!VectorTy) { VectorTy = VectorType::get(In, AllocaSize/EltSize); - return; + return; + } + + unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType() + ->getPrimitiveSizeInBits()/8; + if (EltSize == CurrentEltSize) + return; + + if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) + return; } } @@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { VectorTy = Type::getVoidTy(In->getContext()); } +/// MergeInVectorType - Handles the vector case of MergeInType, returning true +/// if the type was successfully merged and false otherwise. +bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, + uint64_t Offset) { + // Remember if we saw a vector type. + HadAVector = true; + + // TODO: Support nonzero offsets? + if (Offset != 0) + return false; + + // Only allow vectors that are a power-of-2 away from the size of the alloca. + if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) + return false; + + // If this the first vector we see, remember the type so that we know the + // element size. + if (!VectorTy) { + VectorTy = VInTy; + return true; + } + + unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); + unsigned InBitWidth = VInTy->getBitWidth(); + + // Vectors of the same size can be converted using a simple bitcast. + if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) + return true; + + const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType(); + const Type *InElementTy = cast<VectorType>(VInTy)->getElementType(); + + // Do not allow mixed integer and floating-point accesses from vectors of + // different sizes. + if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) + return false; + + if (ElementTy->isFloatingPointTy()) { + // Only allow floating-point vectors of different sizes if they have the + // same element type. + // TODO: This could be loosened a bit, but would anything benefit? + if (ElementTy != InElementTy) + return false; + + // There are no arbitrary-precision floating-point types, which limits the + // number of legal vector types with larger element types that we can form + // to bitcast and extract a subvector. + // TODO: We could support some more cases with mixed fp128 and double here. + if (!(BitWidth == 64 || BitWidth == 128) || + !(InBitWidth == 64 || InBitWidth == 128)) + return false; + } else { + assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " + "or floating-point."); + unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); + unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); + + // Do not allow integer types smaller than a byte or types whose widths are + // not a multiple of a byte. + if (BitWidth < 8 || InBitWidth < 8 || + BitWidth % 8 != 0 || InBitWidth % 8 != 0) + return false; + } + + // Pick the largest of the two vector types. + if (InBitWidth > BitWidth) + VectorTy = VInTy; + + return true; +} + /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all /// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable @@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) return false; - MergeInType(LI->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(LI->getType(), Offset, true); continue; } @@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; - MergeInType(SI->getOperand(0)->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(SI->getOperand(0)->getType(), Offset, true); continue; } @@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { !isa<ConstantInt>(MSI->getLength())) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, } } +/// getScaledElementType - Gets a scaled element type for a partial vector +/// access of an alloca. The input types must be integer or floating-point +/// scalar or vector types, and the resulting type is an integer, float or +/// double. +static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, + unsigned NewBitWidth) { + bool IsFP1 = Ty1->isFloatingPointTy() || + (Ty1->isVectorTy() && + cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy()); + bool IsFP2 = Ty2->isFloatingPointTy() || + (Ty2->isVectorTy() && + cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy()); + + LLVMContext &Context = Ty1->getContext(); + + // Prefer floating-point types over integer types, as integer types may have + // been created by earlier scalar replacement. + if (IsFP1 || IsFP2) { + if (NewBitWidth == 32) + return Type::getFloatTy(Context); + if (NewBitWidth == 64) + return Type::getDoubleTy(Context); + } + + return Type::getIntNTy(Context, NewBitWidth); +} + +/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector +/// to another vector of the same element type which has the same allocation +/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). +static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, + IRBuilder<> &Builder) { + const Type *FromType = FromVal->getType(); + const VectorType *FromVTy = cast<VectorType>(FromType); + const VectorType *ToVTy = cast<VectorType>(ToType); + assert((ToVTy->getElementType() == FromVTy->getElementType()) && + "Vectors must have the same element type"); + Value *UnV = UndefValue::get(FromType); + unsigned numEltsFrom = FromVTy->getNumElements(); + unsigned numEltsTo = ToVTy->getNumElements(); + + SmallVector<Constant*, 3> Args; + const Type* Int32Ty = Builder.getInt32Ty(); + unsigned minNumElts = std::min(numEltsFrom, numEltsTo); + unsigned i; + for (i=0; i != minNumElts; ++i) + Args.push_back(ConstantInt::get(Int32Ty, i)); + + if (i < numEltsTo) { + Constant* UnC = UndefValue::get(Int32Ty); + for (; i != numEltsTo; ++i) + Args.push_back(UnC); + } + Constant *Mask = ConstantVector::get(Args); + return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); +} + /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer /// or vector value FromVal, extracting the bits from the offset specified by /// Offset. This returns the value, which is of type ToType. @@ -589,14 +734,46 @@ Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - if (FromVal->getType() == ToType && Offset == 0) + const Type *FromType = FromVal->getType(); + if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { - if (ToType->isVectorTy()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); + if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + unsigned ToTypeSize = TD.getTypeAllocSize(ToType); + if (ToTypeSize == AllocaSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (FromType->getPrimitiveSizeInBits() == + ToType->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + else + return CreateShuffleVectorCast(FromVal, ToType, Builder); + } + + if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " + "of a smaller vector type at a nonzero offset."); + + const Type *CastElementTy = getScaledElementType(FromType, ToType, + ToTypeSize * 8); + unsigned NumCastVectorElements = AllocaSize / ToTypeSize; + + LLVMContext &Context = FromVal->getContext(); + const Type *CastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Extract, ToType, "tmp"); + } // Otherwise it must be an element access. unsigned Elt = 0; @@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); + if (ValSize == VecSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (VTy->getPrimitiveSizeInBits() == + SV->getType()->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + else + return CreateShuffleVectorCast(SV, VTy, Builder); + } - uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + if (isPowerOf2_64(VecSize / ValSize)) { + assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " + "value of a smaller vector type at a nonzero offset."); - // Must be an element insertion. - unsigned Elt = Offset/EltSize; + const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), + ValSize); + unsigned NumCastVectorElements = VecSize / ValSize; - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + LLVMContext &Context = SV->getContext(); + const Type *OldCastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); - SV = Builder.CreateInsertElement(Old, SV, + Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Insert = + Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Insert, AllocaType, "tmp"); + } + + // Must be an element insertion. + assert(SV->getType() == VTy->getElementType()); + uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + unsigned Elt = Offset/EltSize; + return Builder.CreateInsertElement(Old, SV, ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), "tmp"); - return SV; } // If SV is a first-class aggregate value, insert each value recursively. @@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { } const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); - PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), + PN->getName()+".ld", PN); // Get the TBAA tag and alignment to use from one of the loads. It doesn't // matter which one we get and if any differ, it doesn't matter. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ce5dd73..1137c2b 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -73,7 +73,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { if (UseLLVMTrap) { Function *TrapFn = Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); - CallInst::Create(TrapFn, "", I); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); } new UnreachableInst(I->getContext(), I); @@ -259,11 +260,12 @@ static bool MergeEmptyReturnBlocks(Function &F) { PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); if (RetBlockPHI == 0) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); - RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge", + pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); + RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), + std::distance(PB, PE), "merge", &RetBlock->front()); - for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock); - PI != E; ++PI) + for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp deleted file mode 100644 index 70ff32e..0000000 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ /dev/null @@ -1,160 +0,0 @@ -//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a simple pass that applies an experimental -// transformation on calls to specific functions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "simplify-libcalls-halfpowr" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Target/TargetData.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -using namespace llvm; - -namespace { - /// This pass optimizes well half_powr function calls. - /// - class SimplifyHalfPowrLibCalls : public FunctionPass { - const TargetData *TD; - public: - static char ID; // Pass identification - SimplifyHalfPowrLibCalls() : FunctionPass(ID) { - initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - } - - Instruction * - InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, - Instruction *InsertPt); - }; - char SimplifyHalfPowrLibCalls::ID = 0; -} // end anonymous namespace. - -INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr", - "Simplify half_powr library calls", false, false) - -// Public interface to the Simplify HalfPowr LibCalls pass. -FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { - return new SimplifyHalfPowrLibCalls(); -} - -/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging -/// their control flow to better facilitate subsequent optimization. -Instruction * -SimplifyHalfPowrLibCalls:: -InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, - Instruction *InsertPt) { - std::vector<BasicBlock *> Bodies; - BasicBlock *NewBlock = 0; - - for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) { - CallInst *Call = cast<CallInst>(HalfPowrs[i]); - Function *Callee = Call->getCalledFunction(); - - // Minimally sanity-check the CFG of half_powr to ensure that it contains - // the kind of code we expect. If we're running this pass, we have - // reason to believe it will be what we expect. - Function::iterator I = Callee->begin(); - BasicBlock *Prologue = I++; - if (I == Callee->end()) break; - BasicBlock *SubnormalHandling = I++; - if (I == Callee->end()) break; - BasicBlock *Body = I++; - if (I != Callee->end()) break; - if (SubnormalHandling->getSinglePredecessor() != Prologue) - break; - BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator()); - if (!PBI || !PBI->isConditional()) - break; - BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator()); - if (!SNBI || SNBI->isConditional()) - break; - if (!isa<ReturnInst>(Body->getTerminator())) - break; - - Instruction *NextInst = llvm::next(BasicBlock::iterator(Call)); - - // Inline the call, taking care of what code ends up where. - NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); - - InlineFunctionInfo IFI(0, TD); - bool B = InlineFunction(Call, IFI); - assert(B && "half_powr didn't inline?"); - (void)B; - - BasicBlock *NewBody = NewBlock->getSinglePredecessor(); - assert(NewBody); - Bodies.push_back(NewBody); - } - - if (!NewBlock) - return InsertPt; - - // Put the code for all the bodies into one block, to facilitate - // subsequent optimization. - (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this); - for (unsigned i = 0, e = Bodies.size(); i != e; ++i) { - BasicBlock *Body = Bodies[i]; - Instruction *FNP = Body->getFirstNonPHI(); - // Splice the insts from body into NewBlock. - NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(), - FNP, Body->getTerminator()); - } - - return NewBlock->begin(); -} - -/// runOnFunction - Top level algorithm. -/// -bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { - TD = getAnalysisIfAvailable<TargetData>(); - - bool Changed = false; - std::vector<Instruction *> HalfPowrs; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - // Look for calls. - bool IsHalfPowr = false; - if (CallInst *CI = dyn_cast<CallInst>(I)) { - // Look for direct calls and calls to non-external functions. - Function *Callee = CI->getCalledFunction(); - if (Callee && Callee->hasExternalLinkage()) { - // Look for calls with well-known names. - if (Callee->getName() == "__half_powrf4") - IsHalfPowr = true; - } - } - if (IsHalfPowr) - HalfPowrs.push_back(I); - // We're looking for sequences of up to three such calls, which we'll - // simplify as a group. - if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) { - I = InlineHalfPowrs(HalfPowrs, I); - E = I->getParent()->end(); - HalfPowrs.clear(); - Changed = true; - } - } - assert(HalfPowrs.empty() && "Block had no terminator!"); - } - - return Changed; -} diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 9f136d4..6247b03 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -49,6 +49,7 @@ class LibCallOptimization { protected: Function *Caller; const TargetData *TD; + const TargetLibraryInfo *TLI; LLVMContext* Context; public: LibCallOptimization() { } @@ -62,9 +63,11 @@ public: virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; - Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) { + Value *OptimizeCall(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); this->TD = TD; + this->TLI = TLI; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); @@ -97,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { } return true; } + +static bool CallHasFloatingPointArgument(const CallInst *CI) { + for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); + it != e; ++it) { + if ((*it)->getType()->isFloatingPointTy()) + return true; + } + return false; +} /// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality /// comparisons with With. @@ -1075,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization { // 'printf' Optimizations struct PrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require one fixed pointer argument and an integer/void result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || - FT->getReturnType()->isVoidTy())) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) @@ -1138,20 +1144,40 @@ struct PrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// // 'sprintf' Optimizations struct SPrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed pointer arguments and an integer result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1212,6 +1238,32 @@ struct SPrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// @@ -1278,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization { // 'fprintf' Optimizations struct FPrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed paramters as pointers and integer result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // All the optimizations depend on the format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1330,6 +1376,32 @@ struct FPrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// @@ -1544,8 +1616,11 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { // Set the builder to the instruction after the call. Builder.SetInsertPoint(BB, I); + // Use debug location of CI for all new instructions. + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + // Try to optimize this call. - Value *Result = LCO->OptimizeCall(CI, TD, Builder); + Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder); if (Result == 0) continue; DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI; diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 5b6bc04..539cc6f 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -36,7 +36,7 @@ // evaluated each time through the tail recursion. Safely keeping allocas // in the entry block requires analysis to proves that the tail-called // function does not read or write the stack object. -// 2. Tail recursion is only performed if the call immediately preceeds the +// 2. Tail recursion is only performed if the call immediately precedes the // return instruction. It's possible that there could be a jump between // the call and the return. // 3. There can be intervening operations between the call and the return that @@ -433,7 +433,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, if (CanMoveAboveCall(BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it - // is an associative and commutative operation that could be tranformed + // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = @@ -496,7 +496,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, Instruction *InsertPos = OldEntry->begin(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { - PHINode *PN = PHINode::Create(I->getType(), + PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(I, NewEntry); @@ -527,8 +527,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. + pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); PHINode *AccPN = PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + std::distance(PB, PE) + 1, "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the @@ -537,8 +539,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. - for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); - PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if (P == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); @@ -572,7 +573,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. - BranchInst::Create(OldEntry, Ret); + BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); + NewBI->setDebugLoc(CI->getDebugLoc()); + BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index acaea19..c705cc5 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -447,7 +447,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = - PHINode::Create(PN->getType(), PN->getName()+".ph", BI); + PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. @@ -538,3 +538,15 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, UncondBranch->eraseFromParent(); return cast<ReturnInst>(NewRet); } + +/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a +/// given basic block. +DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) { + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + DebugLoc DL = BI->getDebugLoc(); + if (!DL.isUnknown()) + return DL; + } + return DebugLoc(); +} diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 616b066..caf2aeb 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -56,7 +56,7 @@ char BreakCriticalEdges::ID = 0; INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", "Break critical edges in CFG", false, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; FunctionPass *llvm::createBreakCriticalEdgesPass() { return new BreakCriticalEdges(); @@ -140,7 +140,7 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, if (VP->getParent() == SplitBB) continue; // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = PHINode::Create(PN->getType(), "split", + PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split", SplitBB->getTerminator()); for (unsigned i = 0, e = Preds.size(); i != e; ++i) NewPN->addIncoming(V, Preds[i]); diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index e633772..8c133ea 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -104,7 +104,7 @@ namespace { /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { - bool HasPredsFromRegion = false; + unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { @@ -116,7 +116,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (BlocksToExtract.count(PN->getIncomingBlock(i))) - HasPredsFromRegion = true; + ++NumPredsFromRegion; else ++NumPredsOutsideRegion; @@ -147,7 +147,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. - if (HasPredsFromRegion) { + if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. @@ -157,14 +157,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { TI->replaceUsesOfWith(OldPred, NewBB); } - // Okay, everthing within the region is now branching to the right block, we + // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce", - NewBB->begin()); + PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, + PN->getName()+".ce", NewBB->begin()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index c1faf24..7d17909 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -320,7 +320,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now -// exists in the instruction stream. Similiarly this will inline a recursive +// exists in the instruction stream. Similarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { @@ -624,7 +624,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { - PHI = PHINode::Create(RTy, TheCall->getName(), + PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index b2e5fa6..b654111 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -207,6 +207,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, DomTreeNode *DomNode = DT->getNode(DomBB); + SmallVector<PHINode*, 16> AddedPHIs; + SSAUpdater SSAUpdate; SSAUpdate.Initialize(Inst->getType(), Inst->getName()); @@ -220,9 +222,10 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa", + PHINode *PN = PHINode::Create(Inst->getType(), + PredCache.GetNumPreds(ExitBB), + Inst->getName()+".lcssa", ExitBB->begin()); - PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB)); // Add inputs from inside the loop for this PHI. for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { @@ -236,6 +239,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, &PN->getOperandUse( PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); } + + AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); @@ -262,6 +267,12 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UsesToRewrite[i]); } + + // Remove PHI nodes that did not have any uses rewritten. + for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { + if (AddedPHIs[i]->use_empty()) + AddedPHIs[i]->eraseFromParent(); + } return true; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 3f789fa..4bca2fc 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -20,8 +20,11 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -65,8 +68,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { // Let the basic block know that we are letting go of it. Based on this, // it will adjust it's PHI nodes. - assert(BI->getParent() && "Terminator not inserted in block!"); - OldDest->removePredecessor(BI->getParent()); + OldDest->removePredecessor(BB); // Replace the conditional branch with an unconditional one. BranchInst::Create(Destination, BI); @@ -209,8 +211,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { bool llvm::isInstructionTriviallyDead(Instruction *I) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; - // We don't want debug info removed by anything this general. - if (isa<DbgInfoIntrinsic>(I)) return false; + // We don't want debug info removed by anything this general, unless + // debug info is empty. + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { + if (DDI->getAddress()) + return false; + return true; + } + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { + if (DVI->getValue()) + return false; + return true; + } if (!I->mayHaveSideEffects()) return true; @@ -320,8 +332,14 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { BI = BB->begin(); continue; } - + + if (Inst->isTerminator()) + break; + + WeakVH BIHandle(BI); MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + if (BIHandle != BI) + BI = BB->begin(); } return MadeChange; } @@ -632,6 +650,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); } + // Avoid colliding with the DenseMap sentinels ~0 and ~0-1. + Hash >>= 1; // If we've never seen this hash value before, it's a unique PHI. std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair = HashMap.insert(std::make_pair(Hash, PN)); @@ -753,3 +773,83 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, return Align; } +///===---------------------------------------------------------------------===// +/// Dbg Intrinsic utilities +/// + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + StoreInst *SI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + Instruction *DbgVal = + Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, + DIVar, SI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc SIDL = SI->getDebugLoc(); + if (!SIDL.isUnknown()) + DbgVal->setDebugLoc(SIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + LoadInst *LI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + Instruction *DbgVal = + Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVar, LI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc LIDL = LI->getDebugLoc(); + if (!LIDL.isUnknown()) + DbgVal->setDebugLoc(LIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set +/// of llvm.dbg.value intrinsics. +bool llvm::LowerDbgDeclare(Function &F) { + DIBuilder DIB(*F.getParent()); + SmallVector<DbgDeclareInst *, 4> Dbgs; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + Dbgs.push_back(DDI); + } + if (Dbgs.empty()) + return false; + + for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(), + E = Dbgs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) { + bool RemoveDDI = true; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + else + RemoveDDI = false; + if (RemoveDDI) + DDI->eraseFromParent(); + } + } + return true; +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 2462630..f02ffd2 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -115,7 +115,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", true, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } @@ -648,9 +648,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be", - BETerminator); - NewPN->reserveOperandSpace(BackedgeBlocks.size()); + PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), + PN->getName()+".be", BETerminator); if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 914a439..ed733d3 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -84,7 +84,7 @@ char LowerSwitch::ID = 0; INITIALIZE_PASS(LowerSwitch, "lowerswitch", "Lower SwitchInst's to branches", false, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::LowerSwitchID = LowerSwitch::ID; // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 7788857..50c9ae2 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -45,7 +46,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" #include <algorithm> -#include <map> #include <queue> using namespace llvm; @@ -103,7 +103,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { /// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the /// alloca 'V', if any. static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { - if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) + if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) for (Value::use_iterator UI = DebugNode->use_begin(), E = DebugNode->use_end(); UI != E; ++UI) if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) @@ -273,8 +273,6 @@ namespace { LargeBlockInfo &LBI); void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI); - void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); - void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, @@ -391,7 +389,9 @@ void PromoteMem2Reg::run() { if (Info.UsingBlocks.empty()) { // Record debuginfo for the store and remove the declaration's debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); + if (!DIB) + DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. @@ -423,8 +423,11 @@ void PromoteMem2Reg::run() { while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = Info.DbgDeclare) - ConvertDebugDeclareToDebugValue(DDI, SI); + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } SI->eraseFromParent(); LBI.deleteValue(SI); } @@ -944,28 +947,6 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, } } -// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value -// that has an associated llvm.dbg.decl intrinsic. -void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, - StoreInst *SI) { - DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) - return; - - if (!DIB) - DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); - Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, - DIVar, SI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc SIDL = SI->getDebugLoc(); - if (!SIDL.isUnknown()) - DbgVal->setDebugLoc(SIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); -} - // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific // Alloca returns true if there wasn't already a phi-node for that variable // @@ -979,12 +960,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. - PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), Allocas[AllocaNo]->getName() + "." + Twine(Version++), BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; - PN->reserveOperandSpace(getNumPreds(BB)); if (AST && PN->getType()->isPointerTy()) AST->copyValue(PointerAllocaValues[AllocaNo], PN); @@ -1076,8 +1056,11 @@ NextIteration: // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) - ConvertDebugDeclareToDebugValue(DDI, SI); + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } BB->getInstList().erase(SI); } } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 3896d98..2860c3e 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -20,8 +21,10 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" + using namespace llvm; typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; @@ -170,8 +173,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { } // Ok, we have no way out, insert a new one now. - PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front()); - InsertedPHI->reserveOperandSpace(PredValues.size()); + PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), + ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. for (unsigned i = 0, e = PredValues.size(); i != e; ++i) @@ -184,6 +187,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return V; } + // Set DebugLoc. + InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB)); + // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); @@ -289,9 +295,8 @@ public: /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, SSAUpdater *Updater) { - PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName, - &BB->front()); - PHI->reserveOperandSpace(NumPreds); + PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, + Updater->ProtoName, &BB->front()); return PHI; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index c670885..18b8573 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -37,6 +37,10 @@ #include <map> using namespace llvm; +static cl::opt<unsigned> +PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), + cl::desc("Control the amount of phi node folding to perform (default = 1)")); + static cl::opt<bool> DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -201,11 +205,20 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, /// which works well enough for us. /// /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to -/// see if V (which must be an instruction) is cheap to compute and is -/// non-trapping. If both are true, the instruction is inserted into the set -/// and true is returned. +/// see if V (which must be an instruction) and its recursive operands +/// that do not dominate BB have a combined cost lower than CostRemaining and +/// are non-trapping. If both are true, the instruction is inserted into the +/// set and true is returned. +/// +/// The cost for most non-trapping instructions is defined as 1 except for +/// Select whose cost is 2. +/// +/// After this function returns, CostRemaining is decreased by the cost of +/// V plus its non-dominating operands. If that cost is greater than +/// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, - SmallPtrSet<Instruction*, 4> *AggressiveInsts) { + SmallPtrSet<Instruction*, 4> *AggressiveInsts, + unsigned &CostRemaining) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -232,12 +245,17 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // instructions in the 'if region'. if (AggressiveInsts == 0) return false; + // If we have seen this instruction before, don't count it again. + if (AggressiveInsts->count(I)) return true; + // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. if (!I->isSafeToSpeculativelyExecute()) return false; + unsigned Cost = 0; + switch (I->getOpcode()) { default: return false; // Cannot hoist this out safely. case Instruction::Load: @@ -246,11 +264,13 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // predecessor. if (PBB->getFirstNonPHIOrDbg() != I) return false; + Cost = 1; break; case Instruction::GetElementPtr: // GEPs are cheap if all indices are constant. if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices()) return false; + Cost = 1; break; case Instruction::Add: case Instruction::Sub: @@ -261,13 +281,26 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, case Instruction::LShr: case Instruction::AShr: case Instruction::ICmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + Cost = 1; break; // These are all cheap and non-trapping instructions. + + case Instruction::Select: + Cost = 2; + break; } - // Okay, we can only really hoist these out if their operands are not - // defined in the conditional region. + if (Cost > CostRemaining) + return false; + + CostRemaining -= Cost; + + // Okay, we can only really hoist these out if their operands do + // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, 0)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -807,12 +840,16 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { BasicBlock::iterator BB2_Itr = BB2->begin(); Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++; - while (isa<DbgInfoIntrinsic>(I1)) - I1 = BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = BB2_Itr++; - if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || - !I1->isIdenticalToWhenDefined(I2) || + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = BB2_Itr++; + } + if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; @@ -835,13 +872,17 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I2->eraseFromParent(); I1 = BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I1)) - I1 = BB1_Itr++; I2 = BB2_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = BB2_Itr++; - } while (I1->getOpcode() == I2->getOpcode() && - I1->isIdenticalToWhenDefined(I2)); + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = BB2_Itr++; + } + } while (I1->isIdenticalToWhenDefined(I2)); return true; @@ -1209,6 +1250,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet<Instruction*, 4> AggressiveInsts; + unsigned MaxCostVal0 = PHINodeFoldingThreshold, + MaxCostVal1 = PHINodeFoldingThreshold; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); @@ -1218,8 +1261,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { continue; } - if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) || - !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts)) + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, + MaxCostVal0) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, + MaxCostVal1)) return false; } @@ -1393,24 +1438,23 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { return true; } -/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, -/// and if a predecessor branches to us and one of our successors, fold the -/// setcc into the predecessor and use logical operations to pick the right -/// destination. +/// FoldBranchToCommonDest - If this basic block is simple enough, and if a +/// predecessor branches to us and one of our successors, fold the block into +/// the predecessor and use logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; - + // Only allow this if the condition is a simple instruction that can be // executed unconditionally. It must be in the same block as the branch, and // must be at the front of the block. BasicBlock::iterator FrontIt = BB->front(); + // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(FrontIt)) - ++FrontIt; + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; // Allow a single instruction to be hoisted in addition to the compare // that feeds the branch. We later ensure that any values that _it_ uses @@ -1422,21 +1466,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { FrontIt->isSafeToSpeculativelyExecute()) { BonusInst = &*FrontIt; ++FrontIt; + + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; } - + // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; + // Ingore dbg intrinsics. - while(isa<DbgInfoIntrinsic>(CondIt)) - ++CondIt; - if (&*CondIt != BI) { - assert (!isa<DbgInfoIntrinsic>(CondIt) && "Hey do not forget debug info!"); + while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; + + if (&*CondIt != BI) return false; - } // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. @@ -1447,13 +1493,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (CE->canTrap()) return false; - // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = BI->getSuccessor(1); if (TrueDest == BB || FalseDest == BB) return false; - + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); @@ -1461,10 +1506,24 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - if (PBI == 0 || PBI->isUnconditional() || - !SafeToMergeTerminators(BI, PBI)) + if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) continue; + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc; + bool InvertPredCond = false; + + if (PBI->getSuccessor(0) == TrueDest) + Opc = Instruction::Or; + else if (PBI->getSuccessor(1) == FalseDest) + Opc = Instruction::And; + else if (PBI->getSuccessor(0) == FalseDest) + Opc = Instruction::And, InvertPredCond = true; + else if (PBI->getSuccessor(1) == TrueDest) + Opc = Instruction::Or, InvertPredCond = true; + else + continue; + // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values // must already have been resolved, so we won't be inhibiting the @@ -1502,20 +1561,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (!UsedValues.empty()) return false; } - - Instruction::BinaryOps Opc; - bool InvertPredCond = false; - - if (PBI->getSuccessor(0) == TrueDest) - Opc = Instruction::Or; - else if (PBI->getSuccessor(1) == FalseDest) - Opc = Instruction::And; - else if (PBI->getSuccessor(0) == FalseDest) - Opc = Instruction::And, InvertPredCond = true; - else if (PBI->getSuccessor(1) == TrueDest) - Opc = Instruction::Or, InvertPredCond = true; - else - continue; DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); @@ -1566,6 +1611,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { AddPredecessorToBlock(FalseDest, PredBlock, BB); PBI->setSuccessor(1, FalseDest); } + + // Copy any debug value intrinsics into the end of PredBlock. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (isa<DbgInfoIntrinsic>(*I)) + I->clone()->insertBefore(PBI); + return true; } return false; @@ -1598,13 +1649,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { + pred_iterator PB = pred_begin(BB), PE = pred_end(BB); PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), + std::distance(PB, PE), BI->getCondition()->getName() + ".pr", BB->begin()); // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && PBI->isConditional() && @@ -1800,6 +1853,26 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, return true; } +// SimplifySwitchOnSelect - Replaces +// (switch (select cond, X, Y)) on constant X, Y +// with a branch - conditional if X and Y lead to distinct BBs, +// unconditional otherwise. +static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { + // Check for constant integer values in the select. + ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue()); + ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue()); + if (!TrueVal || !FalseVal) + return false; + + // Find the relevant condition and destinations. + Value *Condition = Select->getCondition(); + BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal)); + BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal)); + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); +} + // SimplifyIndirectBrOnSelect - Replaces // (indirectbr (select cond, blockaddress(@fn, BlockA), // blockaddress(@fn, BlockB))) @@ -2148,7 +2221,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { if (LI->isVolatile()) break; - // Delete this instruction + // Delete this instruction (any uses are guaranteed to be dead) + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); BBI->eraseFromParent(); Changed = true; } @@ -2189,17 +2264,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // If the default value is unreachable, figure out the most popular // destination and make it the default. if (SI->getSuccessor(0) == BB) { - std::map<BasicBlock*, unsigned> Popularity; - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - Popularity[SI->getSuccessor(i)]++; - + std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + std::pair<unsigned, unsigned>& entry = + Popularity[SI->getSuccessor(i)]; + if (entry.first == 0) { + entry.first = 1; + entry.second = i; + } else { + entry.first++; + } + } + // Find the most popular block. unsigned MaxPop = 0; + unsigned MaxIndex = 0; BasicBlock *MaxBlock = 0; - for (std::map<BasicBlock*, unsigned>::iterator + for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second > MaxPop) { - MaxPop = I->second; + if (I->second.first > MaxPop || + (I->second.first == MaxPop && MaxIndex > I->second.second)) { + MaxPop = I->second.first; + MaxIndex = I->second.second; MaxBlock = I->first; } } @@ -2309,7 +2395,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) return SimplifyCFG(BB) | true; - + + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB) | true; + // If the block only contains the switch, see if we can fold the block // away into any preds. BasicBlock::iterator BBI = BB->begin(); diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index ccb8287..46d4ada 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -116,7 +116,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal"); + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); ReturnInst::Create(F.getContext(), PN, NewRetBlock); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index f5481d3..a73bf04 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -39,7 +39,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, return VM[V] = const_cast<Value*>(V); // Create a dummy node in case we have a metadata cycle. - MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0); + MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>()); VM[V] = Dummy; // Check all operands to see if any need to be remapped. @@ -54,7 +54,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, Value *Op = MD->getOperand(i); Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0); } - MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size()); + MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); VM[V] = NewMD; MDNode::deleteTemporary(Dummy); |