diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms')
30 files changed, 928 insertions, 762 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp index 816291d..6df0447 100644 --- a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -22,7 +22,7 @@ static cl::list<std::string> ForceAttributes("force-attribute", cl::Hidden, cl::desc("Add an attribute to a function. This should be a " "pair of 'function-name:attribute-name', for " - "example -force-add-attribute=foo:noinline. This " + "example -force-attribute=foo:noinline. This " "option can be specified multiple times.")); static Attribute::AttrKind parseAttrKind(StringRef Kind) { diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 6dcfb3f..527fdd1 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -6,16 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements a simple interprocedural pass which walks the -// call-graph, looking for functions which do not access or only read -// non-local memory, and marking them readnone/readonly. It does the -// same with function arguments independently, marking them readonly/ -// readnone/nocapture. Finally, well-known library call declarations -// are marked with all attributes that are consistent with the -// function's standard definition. This pass is implemented as a -// bottom-up traversal of the call-graph. -// +/// +/// \file +/// This file implements interprocedural passes which walk the +/// call-graph deducing and/or propagating function attributes. +/// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" @@ -57,19 +52,14 @@ typedef SmallSetVector<Function *, 8> SCCNodeSet; } namespace { -struct FunctionAttrs : public CallGraphSCCPass { +struct PostOrderFunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID) { - initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + PostOrderFunctionAttrs() : CallGraphSCCPass(ID) { + initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); } bool runOnSCC(CallGraphSCC &SCC) override; - bool doInitialization(CallGraph &CG) override { - Revisit.clear(); - return false; - } - bool doFinalization(CallGraph &CG) override; - + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AssumptionCacheTracker>(); @@ -79,20 +69,19 @@ struct FunctionAttrs : public CallGraphSCCPass { private: TargetLibraryInfo *TLI; - SmallVector<WeakVH,16> Revisit; }; } -char FunctionAttrs::ID = 0; -INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", +char PostOrderFunctionAttrs::ID = 0; +INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", +INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs", "Deduce function attributes", false, false) -Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } +Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); } namespace { /// The three kinds of memory access relevant to 'readonly' and @@ -949,8 +938,7 @@ static bool setDoesNotRecurse(Function &F) { return true; } -static bool addNoRecurseAttrs(const CallGraphSCC &SCC, - SmallVectorImpl<WeakVH> &Revisit) { +static bool addNoRecurseAttrs(const CallGraphSCC &SCC) { // Try and identify functions that do not recurse. // If the SCC contains multiple nodes we know for sure there is recursion. @@ -973,32 +961,11 @@ static bool addNoRecurseAttrs(const CallGraphSCC &SCC, // Function calls a potentially recursive function. return setDoesNotRecurse(*F); - // We know that F is not obviously recursive, but we haven't been able to - // prove that it doesn't actually recurse. Add it to the Revisit list to try - // again top-down later. - Revisit.push_back(F); + // Nothing else we can deduce usefully during the postorder traversal. return false; } -static bool addNoRecurseAttrsTopDownOnly(Function *F) { - // If F is internal and all uses are in norecurse functions, then F is also - // norecurse. - if (F->doesNotRecurse()) - return false; - if (F->hasInternalLinkage()) { - for (auto *U : F->users()) - if (auto *I = dyn_cast<Instruction>(U)) { - if (!I->getParent()->getParent()->doesNotRecurse()) - return false; - } else { - return false; - } - return setDoesNotRecurse(*F); - } - return false; -} - -bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { +bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) { TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); bool Changed = false; @@ -1040,19 +1007,100 @@ bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { Changed |= addNoAliasAttrs(SCCNodes); Changed |= addNonNullAttrs(SCCNodes, *TLI); } - - Changed |= addNoRecurseAttrs(SCC, Revisit); + + Changed |= addNoRecurseAttrs(SCC); return Changed; } -bool FunctionAttrs::doFinalization(CallGraph &CG) { +namespace { +/// A pass to do RPO deduction and propagation of function attributes. +/// +/// This pass provides a general RPO or "top down" propagation of +/// function attributes. For a few (rare) cases, we can deduce significantly +/// more about function attributes by working in RPO, so this pass +/// provides the compliment to the post-order pass above where the majority of +/// deduction is performed. +// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so +// this is a boring module pass, but eventually it should be an RPO CGSCC pass +// when such infrastructure is available. +struct ReversePostOrderFunctionAttrs : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ReversePostOrderFunctionAttrs() : ModulePass(ID) { + initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<CallGraphWrapperPass>(); + } +}; +} + +char ReversePostOrderFunctionAttrs::ID = 0; +INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs", + "Deduce function attributes in RPO", false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs", + "Deduce function attributes in RPO", false, false) + +Pass *llvm::createReversePostOrderFunctionAttrsPass() { + return new ReversePostOrderFunctionAttrs(); +} + +static bool addNoRecurseAttrsTopDown(Function &F) { + // We check the preconditions for the function prior to calling this to avoid + // the cost of building up a reversible post-order list. We assert them here + // to make sure none of the invariants this relies on were violated. + assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); + assert(!F.doesNotRecurse() && + "This function has already been deduced as norecurs!"); + assert(F.hasInternalLinkage() && + "Can only do top-down deduction for internal linkage functions!"); + + // If F is internal and all of its uses are calls from a non-recursive + // functions, then none of its calls could in fact recurse without going + // through a function marked norecurse, and so we can mark this function too + // as norecurse. Note that the uses must actually be calls -- otherwise + // a pointer to this function could be returned from a norecurse function but + // this function could be recursively (indirectly) called. Note that this + // also detects if F is directly recursive as F is not yet marked as + // a norecurse function. + for (auto *U : F.users()) { + auto *I = dyn_cast<Instruction>(U); + if (!I) + return false; + CallSite CS(I); + if (!CS || !CS.getParent()->getParent()->doesNotRecurse()) + return false; + } + return setDoesNotRecurse(F); +} + +bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) { + // We only have a post-order SCC traversal (because SCCs are inherently + // discovered in post-order), so we accumulate them in a vector and then walk + // it in reverse. This is simpler than using the RPO iterator infrastructure + // because we need to combine SCC detection and the PO walk of the call + // graph. We can also cheat egregiously because we're primarily interested in + // synthesizing norecurse and so we can only save the singular SCCs as SCCs + // with multiple functions in them will clearly be recursive. + auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + SmallVector<Function *, 16> Worklist; + for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { + if (I->size() != 1) + continue; + + Function *F = I->front()->getFunction(); + if (F && !F->isDeclaration() && !F->doesNotRecurse() && + F->hasInternalLinkage()) + Worklist.push_back(F); + } + bool Changed = false; - // When iterating over SCCs we visit functions in a bottom-up fashion. Some of - // the rules we have for identifying norecurse functions work best with a - // top-down walk, so look again at all the functions we previously marked as - // worth revisiting, in top-down order. - for (auto &F : reverse(Revisit)) - if (F) - Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F)); + for (auto *F : reverse(Worklist)) + Changed |= addNoRecurseAttrsTopDown(*F); + return Changed; } diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp index d8b677b..5e0df95 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -41,15 +41,16 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName, LLVMContext &Context) { SMDiagnostic Err; DEBUG(dbgs() << "Loading '" << FileName << "'\n"); - std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context); + // Metadata isn't loaded or linked until after all functions are + // imported, after which it will be materialized and linked. + std::unique_ptr<Module> Result = + getLazyIRFileModule(FileName, Err, Context, + /* ShouldLazyLoadMetadata = */ true); if (!Result) { Err.print("function-import", errs()); return nullptr; } - Result->materializeMetadata(); - UpgradeDebugInfo(*Result); - return Result; } @@ -132,6 +133,8 @@ static void findExternalCalls(const Module &DestModule, Function &F, // Ignore functions already present in the destination module auto *SrcGV = DestModule.getNamedValue(ImportedName); if (SrcGV) { + if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV)) + SrcGV = SGA->getBaseObject(); assert(isa<Function>(SrcGV) && "Name collision during import"); if (!cast<Function>(SrcGV)->isDeclaration()) { DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring " @@ -324,6 +327,10 @@ bool FunctionImporter::importFunctions(Module &DestModule) { ModuleToTempMDValsMap) { // Load the specified source module. auto &SrcModule = ModuleLoaderCache(SME.getKey()); + // The modules were created with lazy metadata loading. Materialize it + // now, before linking it. + SrcModule.materializeMetadata(); + UpgradeDebugInfo(SrcModule); // Link in all necessary metadata from this module. if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get())) @@ -408,14 +415,19 @@ public: Index = IndexPtr.get(); } + // First we need to promote to global scope and rename any local values that + // are potentially exported to other modules. + if (renameModuleForThinLTO(M, Index)) { + errs() << "Error renaming module\n"; + return false; + } + // Perform the import now. auto ModuleLoader = [&M](StringRef Identifier) { return loadFile(Identifier, M.getContext()); }; FunctionImporter Importer(*Index, ModuleLoader); return Importer.importFunctions(M); - - return false; } }; } // anonymous namespace diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp index 7ea6c08..89629cf0 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp @@ -28,7 +28,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeDAEPass(Registry); initializeDAHPass(Registry); initializeForceFunctionAttrsLegacyPassPass(Registry); - initializeFunctionAttrsPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); initializeIPCPPass(Registry); @@ -42,6 +41,8 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLowerBitSetsPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); + initializePostOrderFunctionAttrsPass(Registry); + initializeReversePostOrderFunctionAttrsPass(Registry); initializePruneEHPass(Registry); initializeStripDeadPrototypesLegacyPassPass(Registry); initializeStripSymbolsPass(Registry); @@ -71,7 +72,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { } void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createFunctionAttrsPass()); + unwrap(PM)->add(createPostOrderFunctionAttrsPass()); } void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp index 8e4ad64..3c6a7bb 100644 --- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -38,7 +38,7 @@ namespace { static char ID; // Pass identification, replacement for typeid unsigned NumLoops; - explicit LoopExtractor(unsigned numLoops = ~0) + explicit LoopExtractor(unsigned numLoops = ~0) : LoopPass(ID), NumLoops(numLoops) { initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); } @@ -143,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. - LI.updateUnloop(L); + LI.markAsRemoved(L); } ++NumExtracted; } diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 9876efa..faada9c 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -251,7 +251,7 @@ void PassManagerBuilder::populateModulePassManager( Inliner = nullptr; } if (!DisableUnitAtATime) - MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs + MPM.add(createPostOrderFunctionAttrsPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -346,6 +346,9 @@ void PassManagerBuilder::populateModulePassManager( // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (!DisableUnitAtATime) + MPM.add(createReversePostOrderFunctionAttrsPass()); + if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve @@ -502,7 +505,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createIPSCCPPass()); // Now that we internalized some globals, see if we can hack on them! - PM.add(createFunctionAttrsPass()); // Add norecurse if possible. + PM.add(createPostOrderFunctionAttrsPass()); + PM.add(createReversePostOrderFunctionAttrsPass()); PM.add(createGlobalOptimizerPass()); // Promote any localized global vars. PM.add(createPromoteMemoryToRegisterPass()); @@ -551,7 +555,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createScalarReplAggregatesPass()); // Run a few AA driven optimizations here and now, to cleanup the code. - PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 7ad0efc..160792b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -636,7 +636,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { // if pattern detected emit alternate sequence if (OpX && OpY) { BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->SetFastMathFlags(Log2->getFastMathFlags()); + Builder->setFastMathFlags(Log2->getFastMathFlags()); Log2->setArgOperand(0, OpY); Value *FMulVal = Builder->CreateFMul(OpX, Log2); Value *FSub = Builder->CreateFSub(FMulVal, OpX); @@ -652,7 +652,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool IgnoreZeroSign = I.hasNoSignedZeros(); if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->SetFastMathFlags(I.getFastMathFlags()); + Builder->setFastMathFlags(I.getFastMathFlags()); Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); @@ -693,7 +693,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Y) { BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->SetFastMathFlags(I.getFastMathFlags()); + Builder->setFastMathFlags(I.getFastMathFlags()); Value *T = Builder->CreateFMul(Opnd1, Opnd1); Value *R = Builder->CreateFMul(T, Y); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 776704d..51219bc 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -930,7 +930,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->SetFastMathFlags(FCI->getFastMathFlags()); + Builder->setFastMathFlags(FCI->getFastMathFlags()); Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal, FCI->getName() + ".inv"); @@ -973,7 +973,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->SetFastMathFlags(FCI->getFastMathFlags()); + Builder->setFastMathFlags(FCI->getFastMathFlags()); Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal, FCI->getName() + ".inv"); @@ -1082,7 +1082,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } else { IRBuilder<>::FastMathFlagGuard FMFG(*Builder); auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags(); - Builder->SetFastMathFlags(FMF); + Builder->setFastMathFlags(FMF); Cmp = Builder->CreateFCmp(Pred, LHS, RHS); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 54a9fbd..5cde31a 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -384,23 +384,20 @@ static void replaceExtractElements(InsertElementInst *InsElt, ConstantVector::get(ExtendMask)); // Insert the new shuffle after the vector operand of the extract is defined - // or at the start of the basic block, so any subsequent extracts can use it. - bool ReplaceAllExtUsers; - if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) { + // (as long as it's not a PHI) or at the start of the basic block of the + // extract, so any subsequent extracts in the same basic block can use it. + // TODO: Insert before the earliest ExtractElementInst that is replaced. + auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp); + if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) WideVec->insertAfter(ExtVecOpInst); - ReplaceAllExtUsers = true; - } else { - // TODO: Insert at start of function, so it's always safe to replace all? + else IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); - ReplaceAllExtUsers = false; - } // Replace extracts from the original narrow vector with extracts from the new // wide vector. for (User *U : ExtVecOp->users()) { ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); - if (!OldExt || - (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent())) + if (!OldExt || OldExt->getParent() != WideVec->getParent()) continue; auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); NewExt->insertAfter(WideVec); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 51ff95d..28483e7 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -93,8 +93,8 @@ private: /// Replace instrprof_increment with an increment of the appropriate value. void lowerIncrement(InstrProfIncrementInst *Inc); - /// Set up the section and uses for coverage data and its references. - void lowerCoverageData(GlobalVariable *CoverageData); + /// Force emitting of name vars for unused functions. + void lowerCoverageData(GlobalVariable *CoverageNamesVar); /// Get the region counters for an increment, creating them if necessary. /// @@ -156,9 +156,9 @@ bool InstrProfiling::runOnModule(Module &M) { } } - if (GlobalVariable *Coverage = - M.getNamedGlobal(getCoverageMappingVarName())) { - lowerCoverageData(Coverage); + if (GlobalVariable *CoverageNamesVar = + M.getNamedGlobal(getCoverageNamesVarName())) { + lowerCoverageData(CoverageNamesVar); MadeChange = true; } @@ -233,28 +233,16 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { Inc->eraseFromParent(); } -void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { - - Constant *Init = CoverageData->getInitializer(); - // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] } - // for some C. If not, the frontend's given us something broken. - assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map"); - assert(isa<ConstantArray>(Init->getAggregateElement(1)) && - "invalid function list in coverage map"); - ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1)); - for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) { - Constant *Record = Records->getOperand(I); - Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts(); +void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { + ConstantArray *Names = + cast<ConstantArray>(CoverageNamesVar->getInitializer()); + for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) { + Constant *NC = Names->getOperand(I); + Value *V = NC->stripPointerCasts(); assert(isa<GlobalVariable>(V) && "Missing reference to function name"); GlobalVariable *Name = cast<GlobalVariable>(V); - // If we have region counters for this name, we've already handled it. - auto It = ProfileDataMap.find(Name); - if (It != ProfileDataMap.end()) - if (It->second.RegionCounters) - continue; - // Move the name variable to the right section. Name->setSection(getNameSection()); Name->setAlignment(1); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 5a7bce5..34aaa7f 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -692,7 +692,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { const DataLayout &DL = F.getParent()->getDataLayout(); unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); - if (isa<StructType>(Shadow->getType())) { + if (Shadow->getType()->isAggregateType()) { paintOrigin(IRB, updateOrigin(Origin, IRB), getOriginPtr(Addr, IRB, Alignment), StoreSize, OriginAlignment); diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 087ce8a..dcdcfed 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -100,9 +100,9 @@ namespace { std::unique_ptr<BranchProbabilityInfo> BPI; bool HasProfileData; #ifdef NDEBUG - SmallPtrSet<BasicBlock*, 16> LoopHeaders; + SmallPtrSet<const BasicBlock *, 16> LoopHeaders; #else - SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders; + SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders; #endif DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet; @@ -163,6 +163,7 @@ namespace { bool SimplifyPartiallyRedundantLoad(LoadInst *LI); bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB); + bool TryToUnfoldSelectInCurrBB(BasicBlock *BB); private: BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, @@ -210,11 +211,12 @@ bool JumpThreading::runOnFunction(Function &F) { // we will loop forever. We take care of this issue by not jump threading for // back edges. This works for normal cases but not for unreachable blocks as // they may have cycle with no back edge. - removeUnreachableBlocks(F); + bool EverChanged = false; + EverChanged |= removeUnreachableBlocks(F, LVI); FindLoopHeaders(F); - bool Changed, EverChanged = false; + bool Changed; do { Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { @@ -363,8 +365,8 @@ void JumpThreading::FindLoopHeaders(Function &F) { SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; FindFunctionBackedges(F, Edges); - for (unsigned i = 0, e = Edges.size(); i != e; ++i) - LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); + for (const auto &Edge : Edges) + LoopHeaders.insert(Edge.second); } /// getKnownConstant - Helper method to determine if we can thread over a @@ -410,8 +412,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // If V is a constant, then it is known in all predecessors. if (Constant *KC = getKnownConstant(V, Preference)) { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Result.push_back(std::make_pair(KC, *PI)); + for (BasicBlock *Pred : predecessors(BB)) + Result.push_back(std::make_pair(KC, Pred)); return true; } @@ -434,8 +436,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. // Perhaps getConstantOnEdge should be smart enough to do this? - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI); @@ -491,22 +492,17 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // Scan for the sentinel. If we find an undef, force it to the // interesting value: x|undef -> true and x&undef -> false. - for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) - if (LHSVals[i].first == InterestingVal || - isa<UndefValue>(LHSVals[i].first)) { - Result.push_back(LHSVals[i]); - Result.back().first = InterestingVal; - LHSKnownBBs.insert(LHSVals[i].second); + for (const auto &LHSVal : LHSVals) + if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) { + Result.emplace_back(InterestingVal, LHSVal.second); + LHSKnownBBs.insert(LHSVal.second); } - for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) - if (RHSVals[i].first == InterestingVal || - isa<UndefValue>(RHSVals[i].first)) { + for (const auto &RHSVal : RHSVals) + if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) { // If we already inferred a value for this block on the LHS, don't // re-add it. - if (!LHSKnownBBs.count(RHSVals[i].second)) { - Result.push_back(RHSVals[i]); - Result.back().first = InterestingVal; - } + if (!LHSKnownBBs.count(RHSVal.second)) + Result.emplace_back(InterestingVal, RHSVal.second); } return !Result.empty(); @@ -522,8 +518,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, return false; // Invert the known values. - for (unsigned i = 0, e = Result.size(); i != e; ++i) - Result[i].first = ConstantExpr::getNot(Result[i].first); + for (auto &R : Result) + R.first = ConstantExpr::getNot(R.first); return true; } @@ -538,12 +534,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, WantInteger, CxtI); // Try to use constant folding to simplify the binary operator. - for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { - Constant *V = LHSVals[i].first; + for (const auto &LHSVal : LHSVals) { + Constant *V = LHSVal.first; Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); if (Constant *KC = getKnownConstant(Folded, WantInteger)) - Result.push_back(std::make_pair(KC, LHSVals[i].second)); + Result.push_back(std::make_pair(KC, LHSVal.second)); } } @@ -591,8 +587,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){ - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = @@ -615,12 +610,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, WantInteger, CxtI); - for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { - Constant *V = LHSVals[i].first; + for (const auto &LHSVal : LHSVals) { + Constant *V = LHSVal.first; Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), V, CmpConst); if (Constant *KC = getKnownConstant(Folded, WantInteger)) - Result.push_back(std::make_pair(KC, LHSVals[i].second)); + Result.push_back(std::make_pair(KC, LHSVal.second)); } return !Result.empty(); @@ -637,8 +632,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, if ((TrueVal || FalseVal) && ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds, WantInteger, CxtI)) { - for (unsigned i = 0, e = Conds.size(); i != e; ++i) { - Constant *Cond = Conds[i].first; + for (auto &C : Conds) { + Constant *Cond = C.first; // Figure out what value to use for the condition. bool KnownCond; @@ -655,7 +650,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // See if the select has a known constant value for this predecessor. if (Constant *Val = KnownCond ? TrueVal : FalseVal) - Result.push_back(std::make_pair(Val, Conds[i].second)); + Result.push_back(std::make_pair(Val, C.second)); } return !Result.empty(); @@ -665,8 +660,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // If all else fails, see if LVI can figure out a constant value for us. Constant *CI = LVI->getConstant(V, BB, CxtI); if (Constant *KC = getKnownConstant(CI, Preference)) { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Result.push_back(std::make_pair(KC, *PI)); + for (BasicBlock *Pred : predecessors(BB)) + Result.push_back(std::make_pair(KC, Pred)); } return !Result.empty(); @@ -736,6 +731,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } } + if (TryToUnfoldSelectInCurrBB(BB)) + return true; + // What kind of constant we're looking for. ConstantPreference Preference = WantInteger; @@ -988,10 +986,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If we got here, the loaded value is transparent through to the start of the // block. Check to see if it is available in any of the predecessor blocks. - for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) { - BasicBlock *PredBB = *PI; - + for (BasicBlock *PredBB : predecessors(LoadBB)) { // If we already scanned this predecessor, skip it. if (!PredsScanned.insert(PredBB).second) continue; @@ -1038,13 +1033,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { SmallVector<BasicBlock*, 8> PredsToSplit; SmallPtrSet<BasicBlock*, 8> AvailablePredSet; - for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i) - AvailablePredSet.insert(AvailablePreds[i].first); + for (const auto &AvailablePred : AvailablePreds) + AvailablePredSet.insert(AvailablePred.first); // Add all the unavailable predecessors to the PredsToSplit list. - for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) { - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(LoadBB)) { // If the predecessor is an indirect goto, we can't split the edge. if (isa<IndirectBrInst>(P->getTerminator())) return false; @@ -1129,9 +1122,9 @@ FindMostPopularDest(BasicBlock *BB, // blocks with known and real destinations to threading undef. We'll handle // them later if interesting. DenseMap<BasicBlock*, unsigned> DestPopularity; - for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) - if (PredToDestList[i].second) - DestPopularity[PredToDestList[i].second]++; + for (const auto &PredToDest : PredToDestList) + if (PredToDest.second) + DestPopularity[PredToDest.second]++; // Find the most popular dest. DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); @@ -1194,10 +1187,10 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, "ComputeValueKnownInPredecessors returned true with no values"); DEBUG(dbgs() << "IN BB: " << *BB; - for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { + for (const auto &PredValue : PredValues) { dbgs() << " BB '" << BB->getName() << "': FOUND condition = " - << *PredValues[i].first - << " for pred '" << PredValues[i].second->getName() << "'.\n"; + << *PredValue.first + << " for pred '" << PredValue.second->getName() << "'.\n"; }); // Decide what we want to thread through. Convert our list of known values to @@ -1210,8 +1203,8 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; - for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { - BasicBlock *Pred = PredValues[i].second; + for (const auto &PredValue : PredValues) { + BasicBlock *Pred = PredValue.second; if (!SeenPreds.insert(Pred).second) continue; // Duplicate predecessor entry. @@ -1220,7 +1213,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (isa<IndirectBrInst>(Pred->getTerminator())) continue; - Constant *Val = PredValues[i].first; + Constant *Val = PredValue.first; BasicBlock *DestBB; if (isa<UndefValue>(Val)) @@ -1260,16 +1253,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // Now that we know what the most popular destination is, factor all // predecessors that will jump to it into a single predecessor. SmallVector<BasicBlock*, 16> PredsToFactor; - for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) - if (PredToDestList[i].second == MostPopularDest) { - BasicBlock *Pred = PredToDestList[i].first; + for (const auto &PredToDest : PredToDestList) + if (PredToDest.second == MostPopularDest) { + BasicBlock *Pred = PredToDest.first; // This predecessor may be a switch or something else that has multiple // edges to the block. Factor each of these edges by listing them // according to # occurrences in PredsToFactor. - TerminatorInst *PredTI = Pred->getTerminator(); - for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i) - if (PredTI->getSuccessor(i) == BB) + for (BasicBlock *Succ : successors(Pred)) + if (Succ == BB) PredsToFactor.push_back(Pred); } @@ -1366,11 +1358,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // Scan the information to see which is most popular: true or false. The // predecessors can be of the set true, false, or undef. unsigned NumTrue = 0, NumFalse = 0; - for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { - if (isa<UndefValue>(XorOpValues[i].first)) + for (const auto &XorOpValue : XorOpValues) { + if (isa<UndefValue>(XorOpValue.first)) // Ignore undefs for the count. continue; - if (cast<ConstantInt>(XorOpValues[i].first)->isZero()) + if (cast<ConstantInt>(XorOpValue.first)->isZero()) ++NumFalse; else ++NumTrue; @@ -1386,12 +1378,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // Collect all of the blocks that this can be folded into so that we can // factor this once and clone it once. SmallVector<BasicBlock*, 8> BlocksToFoldInto; - for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { - if (XorOpValues[i].first != SplitVal && - !isa<UndefValue>(XorOpValues[i].first)) + for (const auto &XorOpValue : XorOpValues) { + if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first)) continue; - BlocksToFoldInto.push_back(XorOpValues[i].second); + BlocksToFoldInto.push_back(XorOpValue.second); } // If we inferred a value for all of the predecessors, then duplication won't @@ -1543,10 +1534,10 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // PHI insertion, of which we are prepared to do, clean these up now. SSAUpdater SSAUpdate; SmallVector<Use*, 16> UsesToRename; - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + for (Instruction &I : *BB) { // Scan all uses of this instruction to see if it is used outside of its // block, and if so, record them in UsesToRename. - for (Use &U : I->uses()) { + for (Use &U : I.uses()) { Instruction *User = cast<Instruction>(U.getUser()); if (PHINode *UserPN = dyn_cast<PHINode>(User)) { if (UserPN->getIncomingBlock(U) == BB) @@ -1561,14 +1552,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, if (UsesToRename.empty()) continue; - DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I->getType(), I->getName()); - SSAUpdate.AddAvailableValue(BB, &*I); - SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]); + SSAUpdate.Initialize(I.getType(), I.getName()); + SSAUpdate.AddAvailableValue(BB, &I); + SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]); while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); @@ -1644,10 +1635,10 @@ void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, // Collect updated outgoing edges' frequencies from BB and use them to update // edge probabilities. SmallVector<uint64_t, 4> BBSuccFreq; - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - auto SuccFreq = (*I == SuccBB) + for (BasicBlock *Succ : successors(BB)) { + auto SuccFreq = (Succ == SuccBB) ? BB2SuccBBFreq - NewBBFreq - : BBOrigFreq * BPI->getEdgeProbability(BB, *I); + : BBOrigFreq * BPI->getEdgeProbability(BB, Succ); BBSuccFreq.push_back(SuccFreq.getFrequency()); } @@ -1783,10 +1774,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // PHI insertion, of which we are prepared to do, clean these up now. SSAUpdater SSAUpdate; SmallVector<Use*, 16> UsesToRename; - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + for (Instruction &I : *BB) { // Scan all uses of this instruction to see if it is used outside of its // block, and if so, record them in UsesToRename. - for (Use &U : I->uses()) { + for (Use &U : I.uses()) { Instruction *User = cast<Instruction>(U.getUser()); if (PHINode *UserPN = dyn_cast<PHINode>(User)) { if (UserPN->getIncomingBlock(U) == BB) @@ -1801,14 +1792,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, if (UsesToRename.empty()) continue; - DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I->getType(), I->getName()); - SSAUpdate.AddAvailableValue(BB, &*I); - SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]); + SSAUpdate.Initialize(I.getType(), I.getName()); + SSAUpdate.AddAvailableValue(BB, &I); + SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]); while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); @@ -1903,3 +1894,62 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { } return false; } + +/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form +/// bb: +/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ... +/// %s = select p, trueval, falseval +/// +/// And expand the select into a branch structure. This later enables +/// jump-threading over bb in this pass. +/// +/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold +/// select if the associated PHI has at least one constant. If the unfolded +/// select is not jump-threaded, it will be folded again in the later +/// optimizations. +bool JumpThreading::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { + // If threading this would thread across a loop header, don't thread the edge. + // See the comments above FindLoopHeaders for justifications and caveats. + if (LoopHeaders.count(BB)) + return false; + + // Look for a Phi/Select pair in the same basic block. The Phi feeds the + // condition of the Select and at least one of the incoming values is a + // constant. + for (BasicBlock::iterator BI = BB->begin(); + PHINode *PN = dyn_cast<PHINode>(BI); ++BI) { + unsigned NumPHIValues = PN->getNumIncomingValues(); + if (NumPHIValues == 0 || !PN->hasOneUse()) + continue; + + SelectInst *SI = dyn_cast<SelectInst>(PN->user_back()); + if (!SI || SI->getParent() != BB) + continue; + + Value *Cond = SI->getCondition(); + if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1)) + continue; + + bool HasConst = false; + for (unsigned i = 0; i != NumPHIValues; ++i) { + if (PN->getIncomingBlock(i) == BB) + return false; + if (isa<ConstantInt>(PN->getIncomingValue(i))) + HasConst = true; + } + + if (HasConst) { + // Expand the select. + TerminatorInst *Term = + SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); + NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); + NewPN->addIncoming(SI->getFalseValue(), BB); + SI->replaceAllUsesWith(NewPN); + SI->eraseFromParent(); + return true; + } + } + + return false; +} diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index e01e23f..8923ff7 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -203,9 +203,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { CurAST = new AliasSetTracker(*AA); // Collect Alias info from subloops. - for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); - LoopItr != LoopItrE; ++LoopItr) { - Loop *InnerL = *LoopItr; + for (Loop *InnerL : L->getSubLoops()) { AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; assert(InnerAST && "Where is my AST?"); @@ -227,9 +225,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Because subloops have already been incorporated into AST, we skip blocks in // subloops. // - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; + for (BasicBlock *BB : L->blocks()) { if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block } @@ -263,9 +259,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { PredIteratorCache PIC; // Loop over all of the alias sets in the tracker object. - for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); - I != E; ++I) - Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts, + for (AliasSet &AS : *CurAST) + Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT, CurLoop, CurAST, &SafetyInfo); @@ -324,9 +319,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // We are processing blocks in reverse dfo, so process children first. const std::vector<DomTreeNode*> &Children = N->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= - sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + for (DomTreeNode *Child : Children) + Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB,CurLoop,LI)) return Changed; @@ -407,9 +402,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, } const std::vector<DomTreeNode*> &Children = N->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= - hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + for (DomTreeNode *Child : Children) + Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); return Changed; } @@ -499,9 +493,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; - for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); - I != E; ++I) { - AliasSet &AS = *I; + for (AliasSet &AS : *CurAST) { if (!AS.isForwardingAliasSet() && AS.isMod()) { FoundMod = true; break; @@ -783,8 +775,8 @@ static bool isGuaranteedToExecute(const Instruction &Inst, CurLoop->getExitBlocks(ExitBlocks); // Verify that the block dominates each of the exit blocks of the loop. - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (!DT->dominates(Inst.getParent(), ExitBlocks[i])) + for (BasicBlock *ExitBlock : ExitBlocks) + if (!DT->dominates(Inst.getParent(), ExitBlock)) return false; // As a degenerate case, if the loop is statically infinite then we haven't @@ -951,17 +943,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // If there is an non-load/store instruction in the loop, we can't promote // it. - if (const LoadInst *load = dyn_cast<LoadInst>(UI)) { - assert(!load->isVolatile() && "AST broken"); - if (!load->isSimple()) + if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) { + assert(!Load->isVolatile() && "AST broken"); + if (!Load->isSimple()) return Changed; - } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) { + } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) continue; - assert(!store->isVolatile() && "AST broken"); - if (!store->isSimple()) + assert(!Store->isVolatile() && "AST broken"); + if (!Store->isSimple()) return Changed; // Don't sink stores from loops without dedicated block exits. Exits // containing indirect branches are not transformed by loop simplify, @@ -979,7 +971,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // restrictive (and performant) alignment and if we are sure this // instruction will be executed, update the alignment. // Larger is better, with the exception of 0 being the best alignment. - unsigned InstAlignment = store->getAlignment(); + unsigned InstAlignment = Store->getAlignment(); if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0) if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { GuaranteedToExecute = true; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index bc00ff3..7b1940b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -245,7 +245,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) { loopInfo.removeBlock(BB); // The last step is to update LoopInfo now that we've eliminated this loop. - loopInfo.updateUnloop(L); + loopInfo.markAsRemoved(L); Changed = true; ++NumDeleted; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 56ae5c0..ecef6db 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -39,16 +39,16 @@ using namespace llvm; #define DEBUG_TYPE "loop-unroll" static cl::opt<unsigned> - UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, + UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The baseline cost threshold for loop unrolling")); static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold( - "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden, + "unroll-percent-dynamic-cost-saved-threshold", cl::Hidden, cl::desc("The percentage of estimated dynamic cost which must be saved by " "unrolling to allow unrolling up to the max threshold.")); static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount( - "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden, + "unroll-dynamic-cost-savings-discount", cl::Hidden, cl::desc("This is the amount discounted from the total unroll cost when " "the unrolled form has a high dynamic cost savings (triggered by " "the '-unroll-perecent-dynamic-cost-saved-threshold' flag).")); @@ -59,17 +59,17 @@ static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze( "iterations when checking full unroll profitability")); static cl::opt<unsigned> -UnrollCount("unroll-count", cl::init(0), cl::Hidden, +UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes")); static cl::opt<bool> -UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, +UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); static cl::opt<bool> -UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, +UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts")); static cl::opt<unsigned> @@ -77,182 +77,95 @@ PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma.")); -namespace { - class LoopUnroll : public LoopPass { - public: - static char ID; // Pass ID, replacement for typeid - LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) { - CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); - CurrentPercentDynamicCostSavedThreshold = - UnrollPercentDynamicCostSavedThreshold; - CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount; - CurrentCount = (C == -1) ? UnrollCount : unsigned(C); - CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; - CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R; - - UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); - UserPercentDynamicCostSavedThreshold = - (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0); - UserDynamicCostSavingsDiscount = - (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0); - UserAllowPartial = (P != -1) || - (UnrollAllowPartial.getNumOccurrences() > 0); - UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0); - UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0); - - initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); - } - /// A magic value for use with the Threshold parameter to indicate - /// that the loop unroll should be performed regardless of how much - /// code expansion would result. - static const unsigned NoThreshold = UINT_MAX; - - // Threshold to use when optsize is specified (and there is no - // explicit -unroll-threshold). - static const unsigned OptSizeUnrollThreshold = 50; - - // Default unroll count for loops with run-time trip count if - // -unroll-count is not set - static const unsigned UnrollRuntimeCount = 8; - - unsigned CurrentCount; - unsigned CurrentThreshold; - unsigned CurrentPercentDynamicCostSavedThreshold; - unsigned CurrentDynamicCostSavingsDiscount; - bool CurrentAllowPartial; - bool CurrentRuntime; - - // Flags for whether the 'current' settings are user-specified. - bool UserCount; - bool UserThreshold; - bool UserPercentDynamicCostSavedThreshold; - bool UserDynamicCostSavingsDiscount; - bool UserAllowPartial; - bool UserRuntime; - - bool runOnLoop(Loop *L, LPPassManager &) override; - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... - /// - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addPreservedID(LCSSAID); - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addPreserved<ScalarEvolutionWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. - // If loop unroll does not preserve dom info then LCSSA pass on next - // loop will receive invalid dom info. - // For now, recreate dom info, if loop is unrolled. - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } +/// A magic value for use with the Threshold parameter to indicate +/// that the loop unroll should be performed regardless of how much +/// code expansion would result. +static const unsigned NoThreshold = UINT_MAX; - // Fill in the UnrollingPreferences parameter with values from the - // TargetTransformationInfo. - void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI, - TargetTransformInfo::UnrollingPreferences &UP) { - UP.Threshold = CurrentThreshold; - UP.PercentDynamicCostSavedThreshold = - CurrentPercentDynamicCostSavedThreshold; - UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount; - UP.OptSizeThreshold = OptSizeUnrollThreshold; - UP.PartialThreshold = CurrentThreshold; - UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; - UP.Count = CurrentCount; - UP.MaxCount = UINT_MAX; - UP.Partial = CurrentAllowPartial; - UP.Runtime = CurrentRuntime; - UP.AllowExpensiveTripCount = false; - TTI.getUnrollingPreferences(L, UP); - } +/// Default unroll count for loops with run-time trip count if +/// -unroll-count is not set +static const unsigned DefaultUnrollRuntimeCount = 8; - // Select and return an unroll count based on parameters from - // user, unroll preferences, unroll pragmas, or a heuristic. - // SetExplicitly is set to true if the unroll count is is set by - // the user or a pragma rather than selected heuristically. - unsigned - selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll, - unsigned PragmaCount, - const TargetTransformInfo::UnrollingPreferences &UP, - bool &SetExplicitly); - - // Select threshold values used to limit unrolling based on a - // total unrolled size. Parameters Threshold and PartialThreshold - // are set to the maximum unrolled size for fully and partially - // unrolled loops respectively. - void selectThresholds(const Loop *L, bool UsePragmaThreshold, - const TargetTransformInfo::UnrollingPreferences &UP, - unsigned &Threshold, unsigned &PartialThreshold, - unsigned &PercentDynamicCostSavedThreshold, - unsigned &DynamicCostSavingsDiscount) { - // Determine the current unrolling threshold. While this is - // normally set from UnrollThreshold, it is overridden to a - // smaller value if the current function is marked as - // optimize-for-size, and the unroll threshold was not user - // specified. - Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; - PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold; - PercentDynamicCostSavedThreshold = - UserPercentDynamicCostSavedThreshold - ? CurrentPercentDynamicCostSavedThreshold - : UP.PercentDynamicCostSavedThreshold; - DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount - ? CurrentDynamicCostSavingsDiscount - : UP.DynamicCostSavingsDiscount; - - if (!UserThreshold && - // FIXME: Use Function::optForSize(). - L->getHeader()->getParent()->hasFnAttribute( - Attribute::OptimizeForSize)) { - Threshold = UP.OptSizeThreshold; - PartialThreshold = UP.PartialOptSizeThreshold; - } - if (UsePragmaThreshold) { - // If the loop has an unrolling pragma, we want to be more - // aggressive with unrolling limits. Set thresholds to at - // least the PragmaTheshold value which is larger than the - // default limits. - if (Threshold != NoThreshold) - Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold); - if (PartialThreshold != NoThreshold) - PartialThreshold = - std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold); - } - } - bool canUnrollCompletely(Loop *L, unsigned Threshold, - unsigned PercentDynamicCostSavedThreshold, - unsigned DynamicCostSavingsDiscount, - uint64_t UnrolledCost, uint64_t RolledDynamicCost); - }; -} +/// Gather the various unrolling parameters based on the defaults, compiler +/// flags, TTI overrides, pragmas, and user specified parameters. +static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( + Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold, + Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, + Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll, + bool PragmaEnableUnroll, unsigned TripCount) { + TargetTransformInfo::UnrollingPreferences UP; -char LoopUnroll::ID = 0; -INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_DEPENDENCY(LCSSA) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) + // Set up the defaults + UP.Threshold = 150; + UP.PercentDynamicCostSavedThreshold = 20; + UP.DynamicCostSavingsDiscount = 2000; + UP.OptSizeThreshold = 50; + UP.PartialThreshold = UP.Threshold; + UP.PartialOptSizeThreshold = UP.OptSizeThreshold; + UP.Count = 0; + UP.MaxCount = UINT_MAX; + UP.Partial = false; + UP.Runtime = false; + UP.AllowExpensiveTripCount = false; + + // Override with any target specific settings + TTI.getUnrollingPreferences(L, UP); + + // Apply size attributes + if (L->getHeader()->getParent()->optForSize()) { + UP.Threshold = UP.OptSizeThreshold; + UP.PartialThreshold = UP.PartialOptSizeThreshold; + } -Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, - int Runtime) { - return new LoopUnroll(Threshold, Count, AllowPartial, Runtime); -} + // Apply unroll count pragmas + if (PragmaCount) + UP.Count = PragmaCount; + else if (PragmaFullUnroll) + UP.Count = TripCount; -Pass *llvm::createSimpleLoopUnrollPass() { - return llvm::createLoopUnrollPass(-1, -1, 0, 0); + // Apply any user values specified by cl::opt + if (UnrollThreshold.getNumOccurrences() > 0) { + UP.Threshold = UnrollThreshold; + UP.PartialThreshold = UnrollThreshold; + } + if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0) + UP.PercentDynamicCostSavedThreshold = + UnrollPercentDynamicCostSavedThreshold; + if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0) + UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount; + if (UnrollCount.getNumOccurrences() > 0) + UP.Count = UnrollCount; + if (UnrollAllowPartial.getNumOccurrences() > 0) + UP.Partial = UnrollAllowPartial; + if (UnrollRuntime.getNumOccurrences() > 0) + UP.Runtime = UnrollRuntime; + + // Apply user values provided by argument + if (UserThreshold.hasValue()) { + UP.Threshold = *UserThreshold; + UP.PartialThreshold = *UserThreshold; + } + if (UserCount.hasValue()) + UP.Count = *UserCount; + if (UserAllowPartial.hasValue()) + UP.Partial = *UserAllowPartial; + if (UserRuntime.hasValue()) + UP.Runtime = *UserRuntime; + + if (PragmaCount > 0 || + ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) { + // If the loop has an unrolling pragma, we want to be more aggressive with + // unrolling limits. Set thresholds to at least the PragmaTheshold value + // which is larger than the default limits. + if (UP.Threshold != NoThreshold) + UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold); + if (UP.PartialThreshold != NoThreshold) + UP.PartialThreshold = + std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold); + } + + return UP; } namespace { @@ -793,12 +706,11 @@ static void SetLoopAlreadyUnrolled(Loop *L) { L->setLoopID(NewLoopID); } -bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold, - unsigned PercentDynamicCostSavedThreshold, - unsigned DynamicCostSavingsDiscount, - uint64_t UnrolledCost, - uint64_t RolledDynamicCost) { - +static bool canUnrollCompletely(Loop *L, unsigned Threshold, + unsigned PercentDynamicCostSavedThreshold, + unsigned DynamicCostSavingsDiscount, + uint64_t UnrolledCost, + uint64_t RolledDynamicCost) { if (Threshold == NoThreshold) { DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n"); return true; @@ -846,60 +758,13 @@ bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold, return false; } -unsigned LoopUnroll::selectUnrollCount( - const Loop *L, unsigned TripCount, bool PragmaFullUnroll, - unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP, - bool &SetExplicitly) { - SetExplicitly = true; - - // User-specified count (either as a command-line option or - // constructor parameter) has highest precedence. - unsigned Count = UserCount ? CurrentCount : 0; - - // If there is no user-specified count, unroll pragmas have the next - // highest precedence. - if (Count == 0) { - if (PragmaCount) { - Count = PragmaCount; - } else if (PragmaFullUnroll) { - Count = TripCount; - } - } - - if (Count == 0) - Count = UP.Count; - - if (Count == 0) { - SetExplicitly = false; - if (TripCount == 0) - // Runtime trip count. - Count = UnrollRuntimeCount; - else - // Conservative heuristic: if we know the trip count, see if we can - // completely unroll (subject to the threshold, checked below); otherwise - // try to find greatest modulo of the trip count which is still under - // threshold value. - Count = TripCount; - } - if (TripCount && Count > TripCount) - return TripCount; - return Count; -} - -bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { - if (skipOptnoneFunction(L)) - return false; - - Function &F = *L->getHeader()->getParent(); - - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - const TargetTransformInfo &TTI = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - +static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, + ScalarEvolution *SE, const TargetTransformInfo &TTI, + AssumptionCache &AC, bool PreserveLCSSA, + Optional<unsigned> ProvidedCount, + Optional<unsigned> ProvidedThreshold, + Optional<bool> ProvidedAllowPartial, + Optional<bool> ProvidedRuntime) { BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << "\n"); @@ -912,9 +777,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { unsigned PragmaCount = UnrollCountPragmaValue(L); bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0; - TargetTransformInfo::UnrollingPreferences UP; - getUnrollingPreferences(L, TTI, UP); - // Find trip count and trip multiple if count is not available unsigned TripCount = 0; unsigned TripMultiple = 1; @@ -929,11 +791,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); } - // Select an initial unroll count. This may be reduced later based - // on size thresholds. - bool CountSetExplicitly; - unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll, - PragmaCount, UP, CountSetExplicitly); + TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( + L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, + ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll, + TripCount); + + unsigned Count = UP.Count; + bool CountSetExplicitly = Count != 0; + // Use a heuristic count if we didn't set anything explicitly. + if (!CountSetExplicitly) + Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount; + if (TripCount && Count > TripCount) + Count = TripCount; unsigned NumInlineCandidates; bool notDuplicatable; @@ -955,21 +824,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { return false; } - unsigned Threshold, PartialThreshold; - unsigned PercentDynamicCostSavedThreshold; - unsigned DynamicCostSavingsDiscount; - // Only use the high pragma threshold when we have a target unroll factor such - // as with "#pragma unroll N" or a pragma indicating full unrolling and the - // trip count is known. Otherwise we rely on the standard threshold to - // heuristically select a reasonable unroll count. - bool UsePragmaThreshold = - PragmaCount > 0 || - ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0); - - selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold, - PercentDynamicCostSavedThreshold, - DynamicCostSavingsDiscount); - // Given Count, TripCount and thresholds determine the type of // unrolling which is to be performed. enum { Full = 0, Partial = 1, Runtime = 2 }; @@ -977,19 +831,20 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { if (TripCount && Count == TripCount) { Unrolling = Partial; // If the loop is really small, we don't need to run an expensive analysis. - if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount, + if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount, UnrolledSize, UnrolledSize)) { Unrolling = Full; } else { // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. - if (Optional<EstimatedUnrollCost> Cost = - analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI, - Threshold + DynamicCostSavingsDiscount)) - if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold, - DynamicCostSavingsDiscount, Cost->UnrolledCost, - Cost->RolledDynamicCost)) { + if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost( + L, TripCount, DT, *SE, TTI, + UP.Threshold + UP.DynamicCostSavingsDiscount)) + if (canUnrollCompletely(L, UP.Threshold, + UP.PercentDynamicCostSavedThreshold, + UP.DynamicCostSavingsDiscount, + Cost->UnrolledCost, Cost->RolledDynamicCost)) { Unrolling = Full; } } @@ -1001,23 +856,22 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; - bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || - (UserRuntime ? CurrentRuntime : UP.Runtime); + bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime; // Don't unroll a runtime trip count loop with unroll full pragma. if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { AllowRuntime = false; } if (Unrolling == Partial) { - bool AllowPartial = PragmaEnableUnroll || - (UserAllowPartial ? CurrentAllowPartial : UP.Partial); + bool AllowPartial = PragmaEnableUnroll || UP.Partial; if (!AllowPartial && !CountSetExplicitly) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } - if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) { + if (UP.PartialThreshold != NoThreshold && + UnrolledSize > UP.PartialThreshold) { // Reduce unroll count to be modulo of TripCount for partial unrolling. - Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2); + Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2); while (Count != 0 && TripCount % Count != 0) Count--; } @@ -1029,7 +883,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { } // Reduce unroll count to be the largest power-of-two factor of // the original count which satisfies the threshold limit. - while (Count != 0 && UnrolledSize > PartialThreshold) { + while (Count != 0 && UnrolledSize > UP.PartialThreshold) { Count >>= 1; UnrolledSize = (LoopSize-2) * Count + 2; } @@ -1086,3 +940,91 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { return true; } + +namespace { +class LoopUnroll : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + LoopUnroll(Optional<unsigned> Threshold = None, + Optional<unsigned> Count = None, + Optional<bool> AllowPartial = None, Optional<bool> Runtime = None) + : LoopPass(ID), ProvidedCount(Count), ProvidedThreshold(Threshold), + ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime) { + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); + } + + Optional<unsigned> ProvidedCount; + Optional<unsigned> ProvidedThreshold; + Optional<bool> ProvidedAllowPartial; + Optional<bool> ProvidedRuntime; + + bool runOnLoop(Loop *L, LPPassManager &) override { + if (skipOptnoneFunction(L)) + return false; + + Function &F = *L->getHeader()->getParent(); + + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount, + ProvidedThreshold, ProvidedAllowPartial, + ProvidedRuntime); + } + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + /// + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. + // If loop unroll does not preserve dom info then LCSSA pass on next + // loop will receive invalid dom info. + // For now, recreate dom info, if loop is unrolled. + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; +} + +char LoopUnroll::ID = 0; +INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) + +Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, + int Runtime) { + // TODO: It would make more sense for this function to take the optionals + // directly, but that's dangerous since it would silently break out of tree + // callers. + return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold), + Count == -1 ? None : Optional<unsigned>(Count), + AllowPartial == -1 ? None + : Optional<bool>(AllowPartial), + Runtime == -1 ? None : Optional<bool>(Runtime)); +} + +Pass *llvm::createSimpleLoopUnrollPass() { + return llvm::createLoopUnrollPass(-1, -1, 0, 0); +} diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 7354016..6b43b0f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -529,12 +529,13 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // We found an instruction that may write to the loaded memory. // We can try to promote at this position instead of the store - // position if nothing alias the store memory after this. + // position if nothing alias the store memory after this and the store + // destination is not in the range. P = &*I; for (; I != E; ++I) { MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { - DEBUG(dbgs() << "Alias " << *I << "\n"); + if (&*I == SI->getOperand(1) || + AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { P = nullptr; break; } @@ -628,13 +629,39 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. - if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) + auto *V = SI->getOperand(0); + if (Value *ByteVal = isBytewiseValue(V)) { if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { BBI = I->getIterator(); // Don't invalidate iterator. return true; } + // If we have an aggregate, we try to promote it to memset regardless + // of opportunity for merging as it can expose optimization opportunities + // in subsequent passes. + auto *T = V->getType(); + if (T->isAggregateType()) { + uint64_t Size = DL.getTypeStoreSize(T); + unsigned Align = SI->getAlignment(); + if (!Align) + Align = DL.getABITypeAlignment(T); + IRBuilder<> Builder(SI); + auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, + Size, Align, SI->isVolatile()); + + DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); + + MD->removeInstruction(SI); + SI->eraseFromParent(); + NumMemSetInfer++; + + // Make sure we do not invalidate the iterator. + BBI = M->getIterator(); + return true; + } + } + return false; } diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp index 28c610c..b56b355 100644 --- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -499,7 +499,7 @@ static bool isGCSafepointPoll(Function &F) { static bool shouldRewriteFunction(Function &F) { // TODO: This should check the GCStrategy if (F.hasGC()) { - const char *FunctionGCName = F.getGC(); + const auto &FunctionGCName = F.getGC(); const StringRef StatepointExampleName("statepoint-example"); const StringRef CoreCLRName("coreclr"); return (StatepointExampleName == FunctionGCName) || diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index 401a740..bcadd4e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -2155,7 +2155,8 @@ void Reassociate::OptimizeInst(Instruction *I) { // During the initial run we will get to the root of the tree. // But if we get here while we are redoing instructions, there is no // guarantee that the root will be visited. So Redo later - if (BO->user_back() != BO) + if (BO->user_back() != BO && + BO->getParent() == BO->user_back()->getParent()) RedoInsts.insert(BO->user_back()); return; } diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 5d253be..d77d574 100644 --- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -78,6 +78,13 @@ static cl::opt<bool> AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", cl::Hidden, cl::init(true)); +/// Should we split vectors of pointers into their individual elements? This +/// is known to be buggy, but the alternate implementation isn't yet ready. +/// This is purely to provide a debugging and dianostic hook until the vector +/// split is replaced with vector relocations. +static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden, + cl::init(true)); + namespace { struct RewriteStatepointsForGC : public ModulePass { static char ID; // Pass identification, replacement for typeid @@ -357,10 +364,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I); /// particular element in 'I'. static BaseDefiningValueResult findBaseDefiningValueOfVector(Value *I) { - assert(I->getType()->isVectorTy() && - cast<VectorType>(I->getType())->getElementType()->isPointerTy() && - "Illegal to ask for the base pointer of a non-pointer type"); - // Each case parallels findBaseDefiningValue below, see that code for // detailed motivation. @@ -368,26 +371,10 @@ findBaseDefiningValueOfVector(Value *I) { // An incoming argument to the function is a base pointer return BaseDefiningValueResult(I, true); - // We shouldn't see the address of a global as a vector value? - assert(!isa<GlobalVariable>(I) && - "unexpected global variable found in base of vector"); - - // inlining could possibly introduce phi node that contains - // undef if callee has multiple returns - if (isa<UndefValue>(I)) - // utterly meaningless, but useful for dealing with partially optimized - // code. + if (isa<Constant>(I)) + // Constant vectors consist only of constant pointers. return BaseDefiningValueResult(I, true); - // Due to inheritance, this must be _after_ the global variable and undef - // checks - if (Constant *Con = dyn_cast<Constant>(I)) { - assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && - "order of checks wrong!"); - assert(Con->isNullValue() && "null is the only case which makes sense"); - return BaseDefiningValueResult(Con, true); - } - if (isa<LoadInst>(I)) return BaseDefiningValueResult(I, true); @@ -417,11 +404,11 @@ findBaseDefiningValueOfVector(Value *I) { /// (i.e. a PHI or Select of two derived pointers), or c) involves a change /// from pointer to vector type or back. static BaseDefiningValueResult findBaseDefiningValue(Value *I) { + assert(I->getType()->isPtrOrPtrVectorTy() && + "Illegal to ask for the base pointer of a non-pointer type"); + if (I->getType()->isVectorTy()) return findBaseDefiningValueOfVector(I); - - assert(I->getType()->isPointerTy() && - "Illegal to ask for the base pointer of a non-pointer type"); if (isa<Argument>(I)) // An incoming argument to the function is a base pointer @@ -1310,18 +1297,29 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, assert(Index < LiveVec.size() && "Bug in std::find?"); return Index; }; - - // All gc_relocate are set to i8 addrspace(1)* type. We originally generated - // unique declarations for each pointer type, but this proved problematic - // because the intrinsic mangling code is incomplete and fragile. Since - // we're moving towards a single unified pointer type anyways, we can just - // cast everything to an i8* of the right address space. A bitcast is added - // later to convert gc_relocate to the actual value's type. Module *M = StatepointToken->getModule(); - auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace(); - Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)}; - Value *GCRelocateDecl = - Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types); + + // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose + // element type is i8 addrspace(1)*). We originally generated unique + // declarations for each pointer type, but this proved problematic because + // the intrinsic mangling code is incomplete and fragile. Since we're moving + // towards a single unified pointer type anyways, we can just cast everything + // to an i8* of the right address space. A bitcast is added later to convert + // gc_relocate to the actual value's type. + auto getGCRelocateDecl = [&] (Type *Ty) { + assert(isHandledGCPointerType(Ty)); + auto AS = Ty->getScalarType()->getPointerAddressSpace(); + Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS); + if (auto *VT = dyn_cast<VectorType>(Ty)) + NewTy = VectorType::get(NewTy, VT->getNumElements()); + return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, + {NewTy}); + }; + + // Lazily populated map from input types to the canonicalized form mentioned + // in the comment above. This should probably be cached somewhere more + // broadly. + DenseMap<Type*, Value*> TypeToDeclMap; for (unsigned i = 0; i < LiveVariables.size(); i++) { // Generate the gc.relocate call and save the result @@ -1329,6 +1327,11 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i])); Value *LiveIdx = Builder.getInt32(LiveStart + i); + Type *Ty = LiveVariables[i]->getType(); + if (!TypeToDeclMap.count(Ty)) + TypeToDeclMap[Ty] = getGCRelocateDecl(Ty); + Value *GCRelocateDecl = TypeToDeclMap[Ty]; + // only specify a debug name if we can give a useful one CallInst *Reloc = Builder.CreateCall( GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx}, @@ -2380,15 +2383,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // Do a limited scalarization of any live at safepoint vector values which // contain pointers. This enables this pass to run after vectorization at - // the cost of some possible performance loss. TODO: it would be nice to - // natively support vectors all the way through the backend so we don't need - // to scalarize here. - for (size_t i = 0; i < Records.size(); i++) { - PartiallyConstructedSafepointRecord &Info = Records[i]; - Instruction *Statepoint = ToUpdate[i].getInstruction(); - splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet, - Info.PointerToBase, DT); - } + // the cost of some possible performance loss. Note: This is known to not + // handle updating of the side tables correctly which can lead to relocation + // bugs when the same vector is live at multiple statepoints. We're in the + // process of implementing the alternate lowering - relocating the + // vector-of-pointers as first class item and updating the backend to + // understand that - but that's not yet complete. + if (UseVectorSplit) + for (size_t i = 0; i < Records.size(); i++) { + PartiallyConstructedSafepointRecord &Info = Records[i]; + Instruction *Statepoint = ToUpdate[i].getInstruction(); + splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet, + Info.PointerToBase, DT); + } // In order to reduce live set of statepoint we might choose to rematerialize // some values instead of relocating them. This is purely an optimization and @@ -2467,7 +2474,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, #ifndef NDEBUG // sanity check for (auto *Ptr : Live) - assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type"); + assert(isHandledGCPointerType(Ptr->getType()) && + "must be a gc pointer type"); #endif relocationViaAlloca(F, DT, Live, Records); @@ -2547,7 +2555,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { static bool shouldRewriteStatepointsIn(Function &F) { // TODO: This should check the GCStrategy if (F.hasGC()) { - const char *FunctionGCName = F.getGC(); + const auto &FunctionGCName = F.getGC(); const StringRef StatepointExampleName("statepoint-example"); const StringRef CoreCLRName("coreclr"); return (StatepointExampleName == FunctionGCName) || diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index 2fca803..8569e08 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -757,9 +757,14 @@ void SCCPSolver::visitCastInst(CastInst &I) { LatticeVal OpSt = getValueState(I.getOperand(0)); if (OpSt.isOverdefined()) // Inherit overdefinedness of operand markOverdefined(&I); - else if (OpSt.isConstant()) // Propagate constant value - markConstant(&I, ConstantExpr::getCast(I.getOpcode(), - OpSt.getConstant(), I.getType())); + else if (OpSt.isConstant()) { + Constant *C = + ConstantExpr::getCast(I.getOpcode(), OpSt.getConstant(), I.getType()); + if (isa<UndefValue>(C)) + return; + // Propagate constant value + markConstant(&I, C); + } } @@ -859,10 +864,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { LatticeVal &IV = ValueState[&I]; if (IV.isOverdefined()) return; - if (V1State.isConstant() && V2State.isConstant()) - return markConstant(IV, &I, - ConstantExpr::get(I.getOpcode(), V1State.getConstant(), - V2State.getConstant())); + if (V1State.isConstant() && V2State.isConstant()) { + Constant *C = ConstantExpr::get(I.getOpcode(), V1State.getConstant(), + V2State.getConstant()); + // X op Y -> undef. + if (isa<UndefValue>(C)) + return; + return markConstant(IV, &I, C); + } // If something is undef, wait for it to resolve. if (!V1State.isOverdefined() && !V2State.isOverdefined()) @@ -917,10 +926,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { LatticeVal &IV = ValueState[&I]; if (IV.isOverdefined()) return; - if (V1State.isConstant() && V2State.isConstant()) - return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), - V1State.getConstant(), - V2State.getConstant())); + if (V1State.isConstant() && V2State.isConstant()) { + Constant *C = ConstantExpr::getCompare( + I.getPredicate(), V1State.getConstant(), V2State.getConstant()); + if (isa<UndefValue>(C)) + return; + return markConstant(IV, &I, C); + } // If operands are still undefined, wait for it to resolve. if (!V1State.isOverdefined() && !V2State.isOverdefined()) @@ -1020,8 +1032,11 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { Constant *Ptr = Operands[0]; auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end()); - markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, - Indices)); + Constant *C = + ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices); + if (isa<UndefValue>(C)) + return; + markConstant(&I, C); } void SCCPSolver::visitStoreInst(StoreInst &SI) { @@ -1061,9 +1076,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) { Constant *Ptr = PtrVal.getConstant(); - // load null -> null + // load null is undefined. if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0) - return markConstant(IV, &I, UndefValue::get(I.getType())); + return; // Transform load (constant global) into the value loaded. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { @@ -1079,8 +1094,11 @@ void SCCPSolver::visitLoadInst(LoadInst &I) { } // Transform load from a constant into a constant if possible. - if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) + if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) { + if (isa<UndefValue>(C)) + return; return markConstant(IV, &I, C); + } // Otherwise we cannot say for certain what value this load will produce. // Bail out. @@ -1122,8 +1140,12 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands, TLI)) + if (Constant *C = ConstantFoldCall(F, Operands, TLI)) { + // call -> undef. + if (isa<UndefValue>(C)) + return; return markConstant(I, C); + } } // Otherwise, we don't know anything about this call, mark it overdefined. @@ -1379,6 +1401,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // X % undef -> undef. No change. if (Op1LV.isUndefined()) break; + // X / 0 -> undef. No change. + // X % 0 -> undef. No change. + if (Op1LV.isConstant() && Op1LV.getConstant()->isZeroValue()) + break; + // undef / X -> 0. X could be maxint. // undef % X -> 0. X could be 1. markForcedConstant(&I, Constant::getNullValue(ITy)); diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 0e0b00d..4e84d72 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -425,9 +425,7 @@ bool TailCallElim::runTRE(Function &F) { // with themselves. Check to see if we did and clean up our mess if so. This // occurs when a function passes an argument straight through to its tail // call. - for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { - PHINode *PN = ArgumentPHIs[i]; - + for (PHINode *PN : ArgumentPHIs) { // If the PHI Node is a dynamic constant, replace it with the value it is. if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) { PN->replaceAllUsesWith(PNV); @@ -468,10 +466,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // return value of the call, it must only use things that are defined before // the call, or movable instructions between the call and the instruction // itself. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i) == CI) - return false; - return true; + return std::find(I->op_begin(), I->op_end(), CI) == I->op_end(); } /// Return true if the specified value is the same when the return would exit diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index a5137e9..72db980 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -626,11 +626,17 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, Clone2->setName(Twine("lpad") + Suffix2); NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); - // Create a PHI node for the two cloned landingpad instructions. - PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); - PN->addIncoming(Clone1, NewBB1); - PN->addIncoming(Clone2, NewBB2); - LPad->replaceAllUsesWith(PN); + // Create a PHI node for the two cloned landingpad instructions only + // if the original landingpad instruction has some uses. + if (!LPad->use_empty()) { + assert(!LPad->getType()->isTokenTy() && + "Split cannot be applied if LPad is token type. Otherwise an " + "invalid PHINode of token type would be created."); + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + } LPad->eraseFromParent(); } else { // There is no second clone. Just replace the landing pad with the first diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 854a3b8..6454afb 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -266,27 +266,14 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - CloningDirector *Director; - ValueMapTypeRemapper *TypeMapper; - ValueMaterializer *Materializer; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, - const char *nameSuffix, ClonedCodeInfo *codeInfo, - CloningDirector *Director) + const char *nameSuffix, ClonedCodeInfo *codeInfo) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo), Director(Director) { - // These are optional components. The Director may return null. - if (Director) { - TypeMapper = Director->getTypeRemapper(); - Materializer = Director->getValueMaterializer(); - } else { - TypeMapper = nullptr; - Materializer = nullptr; - } - } + CodeInfo(codeInfo) {} /// The specified block is found to be reachable, clone it and /// anything that it can reach. @@ -332,23 +319,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // loop doesn't include the terminator. for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { - // If the "Director" remaps the instruction, don't clone it. - if (Director) { - CloningDirector::CloningAction Action = - Director->handleInstruction(VMap, &*II, NewBB); - // If the cloning director says stop, we want to stop everything, not - // just break out of the loop (which would cause the terminator to be - // cloned). The cloning director is responsible for inserting a proper - // terminator into the new basic block in this case. - if (Action == CloningDirector::StopCloningBB) - return; - // If the cloning director says skip, continue to the next instruction. - // In this case, the cloning director is responsible for mapping the - // skipped instruction to some value that is defined in the new - // basic block. - if (Action == CloningDirector::SkipInstruction) - continue; - } Instruction *NewInst = II->clone(); @@ -356,8 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // nodes for which we defer processing until we update the CFG. if (!isa<PHINode>(NewInst)) { RemapInstruction(NewInst, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into @@ -397,26 +366,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; - if (Director) { - CloningDirector::CloningAction Action - = Director->handleInstruction(VMap, OldTI, NewBB); - // If the cloning director says stop, we want to stop everything, not - // just break out of the loop (which would cause the terminator to be - // cloned). The cloning director is responsible for inserting a proper - // terminator into the new basic block in this case. - if (Action == CloningDirector::StopCloningBB) - return; - if (Action == CloningDirector::CloneSuccessors) { - // If the director says to skip with a terminate instruction, we still - // need to clone this block's successors. - const TerminatorInst *TI = NewBB->getTerminator(); - for (const BasicBlock *Succ : TI->successors()) - ToClone.push_back(Succ); - return; - } - assert(Action != CloningDirector::SkipInstruction && - "SkipInstruction is not valid for terminators."); - } if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { if (BI->isConditional()) { // If the condition was a known constant in the callee... @@ -485,19 +434,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo, - CloningDirector *Director) { + const char *NameSuffix, + ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; - if (Director) { - TypeMapper = Director->getTypeRemapper(); - Materializer = Director->getValueMaterializer(); - } - #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. @@ -507,7 +450,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo, Director); + NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); @@ -731,8 +674,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ClonedCodeInfo *CodeInfo, Instruction *TheCall) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, - ModuleLevelChanges, Returns, NameSuffix, CodeInfo, - nullptr); + ModuleLevelChanges, Returns, NameSuffix, CodeInfo); } /// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 0e386ac..d2793e5 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -1051,9 +1052,31 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); - if (ExtendedArg) - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, + if (ExtendedArg) { + // We're now only describing a subset of the variable. The piece we're + // describing will always be smaller than the variable size, because + // VariableSize == Size of Alloca described by DDI. Since SI stores + // to the alloca described by DDI, if it's first operand is an extend, + // we're guaranteed that before extension, the value was narrower than + // the size of the alloca, hence the size of the described variable. + SmallVector<uint64_t, 3> NewDIExpr; + unsigned PieceOffset = 0; + // If this already is a bit piece, we drop the bit piece from the expression + // and record the offset. + if (DIExpr->isBitPiece()) { + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); + PieceOffset = DIExpr->getBitPieceOffset(); + } else { + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + } + NewDIExpr.push_back(dwarf::DW_OP_bit_piece); + NewDIExpr.push_back(PieceOffset); //Offset + const DataLayout &DL = DDI->getModule()->getDataLayout(); + NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, + Builder.createExpression(NewDIExpr), DDI->getDebugLoc(), SI); + } else Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, DDI->getDebugLoc(), SI); @@ -1407,7 +1430,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { /// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -bool llvm::removeUnreachableBlocks(Function &F) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { SmallPtrSet<BasicBlock*, 128> Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -1428,6 +1451,8 @@ bool llvm::removeUnreachableBlocks(Function &F) { ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(&*BB); + if (LVI) + LVI->eraseBlock(&*BB); BB->dropAllReferences(); } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 2499b88..eea9237 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -528,7 +528,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Loop *OuterL = L->getParentLoop(); // Update LoopInfo if the loop is completely removed. if (CompletelyUnroll) - LI->updateUnloop(L);; + LI->markAsRemoved(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify @@ -542,7 +542,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after - // LoopInfo's been updated by updateUnloop. + // LoopInfo's been updated by markAsRemoved. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index e038805..fa958e9 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -599,7 +599,7 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, IRBuilder<>::FastMathFlagGuard FMFG(Builder); FastMathFlags FMF; FMF.setUnsafeAlgebra(); - Builder.SetFastMathFlags(FMF); + Builder.setFastMathFlags(FMF); Value *Cmp; if (RK == MRK_FloatMin || RK == MRK_FloatMax) diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3bb3fa5..3125a2c 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -141,6 +141,8 @@ class SimplifyCFGOpt { bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool SimplifySingleResume(ResumeInst *RI); + bool SimplifyCommonResume(ResumeInst *RI); bool SimplifyCleanupReturn(CleanupReturnInst *RI); bool SimplifyUnreachable(UnreachableInst *UI); bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); @@ -3239,14 +3241,101 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, } bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { - // If this is a trivial landing pad that just continues unwinding the caught - // exception then zap the landing pad, turning its invokes into calls. + if (isa<PHINode>(RI->getValue())) + return SimplifyCommonResume(RI); + else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) && + RI->getValue() == RI->getParent()->getFirstNonPHI()) + // The resume must unwind the exception that caused control to branch here. + return SimplifySingleResume(RI); + + return false; +} + +// Simplify resume that is shared by several landing pads (phi of landing pad). +bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { + BasicBlock *BB = RI->getParent(); + + // Check that there are no other instructions except for debug intrinsics + // between the phi of landing pads (RI->getValue()) and resume instruction. + BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(), + E = RI->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + SmallSet<BasicBlock *, 4> TrivialUnwindBlocks; + auto *PhiLPInst = cast<PHINode>(RI->getValue()); + + // Check incoming blocks to see if any of them are trivial. + for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); + Idx != End; Idx++) { + auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx); + auto *IncomingValue = PhiLPInst->getIncomingValue(Idx); + + // If the block has other successors, we can not delete it because + // it has other dependents. + if (IncomingBB->getUniqueSuccessor() != BB) + continue; + + auto *LandingPad = + dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI()); + // Not the landing pad that caused the control to branch here. + if (IncomingValue != LandingPad) + continue; + + bool isTrivial = true; + + I = IncomingBB->getFirstNonPHI()->getIterator(); + E = IncomingBB->getTerminator()->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) { + isTrivial = false; + break; + } + + if (isTrivial) + TrivialUnwindBlocks.insert(IncomingBB); + } + + // If no trivial unwind blocks, don't do any simplifications. + if (TrivialUnwindBlocks.empty()) return false; + + // Turn all invokes that unwind here into calls. + for (auto *TrivialBB : TrivialUnwindBlocks) { + // Blocks that will be simplified should be removed from the phi node. + // Note there could be multiple edges to the resume block, and we need + // to remove them all. + while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1) + BB->removePredecessor(TrivialBB, true); + + for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); + PI != PE;) { + BasicBlock *Pred = *PI++; + removeUnwindEdge(Pred); + } + + // In each SimplifyCFG run, only the current processed block can be erased. + // Otherwise, it will break the iteration of SimplifyCFG pass. So instead + // of erasing TrivialBB, we only remove the branch to the common resume + // block so that we can later erase the resume block since it has no + // predecessors. + TrivialBB->getTerminator()->eraseFromParent(); + new UnreachableInst(RI->getContext(), TrivialBB); + } + + // Delete the resume block if all its predecessors have been removed. + if (pred_empty(BB)) + BB->eraseFromParent(); + + return !TrivialUnwindBlocks.empty(); +} + +// Simplify resume that is only used by a single (non-phi) landing pad. +bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); - if (RI->getValue() != LPInst) - // Not a landing pad, or the resume is not unwinding the exception that - // caused control to branch here. - return false; + assert (RI->getValue() == LPInst && + "Resume must unwind the exception that caused control to here"); // Check that there are no other instructions except for debug intrinsics. BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator(); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index dc5fee5..dc07440 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -997,7 +997,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(CI->getFastMathFlags()); + B.setFastMathFlags(CI->getFastMathFlags()); // floor((double)floatval) -> (double)floorf(floatval) if (Callee->isIntrinsic()) { @@ -1035,7 +1035,7 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(CI->getFastMathFlags()); + B.setFastMathFlags(CI->getFastMathFlags()); // fmin((double)floatval1, (double)floatval2) // -> (double)fminf(floatval1, floatval2) @@ -1127,29 +1127,26 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Callee->getAttributes()); } + // FIXME: Use instruction-level FMF. bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); - // pow(exp(x), y) -> exp(x*y) + // pow(exp(x), y) -> exp(x * y) // pow(exp2(x), y) -> exp2(x * y) - // We enable these only under fast-math. Besides rounding - // differences the transformation changes overflow and - // underflow behavior quite dramatically. + // We enable these only with fast-math. Besides rounding differences, the + // transformation changes overflow and underflow behavior quite dramatically. // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). - if (UnsafeFPMath) { - if (auto *OpC = dyn_cast<CallInst>(Op1)) { + auto *OpC = dyn_cast<CallInst>(Op1); + if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) { + LibFunc::Func Func; + Function *OpCCallee = OpC->getCalledFunction(); + if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && + TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) { IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - FMF.setUnsafeAlgebra(); - B.SetFastMathFlags(FMF); - - LibFunc::Func Func; - Function *OpCCallee = OpC->getCalledFunction(); - if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && - TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) - return EmitUnaryFloatFnCall( - B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"), - OpCCallee->getName(), B, OpCCallee->getAttributes()); + B.setFastMathFlags(CI->getFastMathFlags()); + Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); + return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, + OpCCallee->getAttributes()); } } @@ -1167,9 +1164,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc::fabsl)) { // In -ffast-math, pow(x, 0.5) -> sqrt(x). - if (UnsafeFPMath) + if (CI->hasUnsafeAlgebra()) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, Callee->getAttributes()); + } // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero @@ -1328,7 +1328,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { FMF.setNoSignedZeros(); FMF.setNoNaNs(); } - B.SetFastMathFlags(FMF); + B.setFastMathFlags(FMF); // We have a relaxed floating-point environment. We can ignore NaN-handling // and transform to a compare and select. We do not have to consider errno or @@ -1354,11 +1354,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { !FT->getParamType(0)->isFloatingPointTy()) return Ret; - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + if (!CI->hasUnsafeAlgebra()) return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); - if (!OpC) + + // The earlier call must also be unsafe in order to do these transforms. + if (!OpC || !OpC->hasUnsafeAlgebra()) return Ret; // log(pow(x,y)) -> y*log(x) @@ -1369,7 +1371,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; FMF.setUnsafeAlgebra(); - B.SetFastMathFlags(FMF); + B.setFastMathFlags(FMF); LibFunc::Func Func; Function *F = OpC->getCalledFunction(); @@ -1397,66 +1399,67 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + + if (!CI->hasUnsafeAlgebra()) return Ret; - Value *Op = CI->getArgOperand(0); - if (Instruction *I = dyn_cast<Instruction>(Op)) { - if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) { - // We're looking for a repeated factor in a multiplication tree, - // so we can do this fold: sqrt(x * x) -> fabs(x); - // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y). - Value *Op0 = I->getOperand(0); - Value *Op1 = I->getOperand(1); - Value *RepeatOp = nullptr; - Value *OtherOp = nullptr; - if (Op0 == Op1) { - // Simple match: the operands of the multiply are identical. - RepeatOp = Op0; - } else { - // Look for a more complicated pattern: one of the operands is itself - // a multiply, so search for a common factor in that multiply. - // Note: We don't bother looking any deeper than this first level or for - // variations of this pattern because instcombine's visitFMUL and/or the - // reassociation pass should give us this form. - Value *OtherMul0, *OtherMul1; - if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { - // Pattern: sqrt((x * y) * z) - if (OtherMul0 == OtherMul1) { - // Matched: sqrt((x * x) * z) - RepeatOp = OtherMul0; - OtherOp = Op1; - } - } - } - if (RepeatOp) { - // Fast math flags for any created instructions should match the sqrt - // and multiply. - // FIXME: We're not checking the sqrt because it doesn't have - // fast-math-flags (see earlier comment). - IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(I->getFastMathFlags()); - // If we found a repeated factor, hoist it out of the square root and - // replace it with the fabs of that factor. - Module *M = Callee->getParent(); - Type *ArgType = Op->getType(); - Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); - Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); - if (OtherOp) { - // If we found a non-repeated factor, we still need to get its square - // root. We then multiply that by the value that was simplified out - // of the square root calculation. - Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); - Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); - return B.CreateFMul(FabsCall, SqrtCall); - } - return FabsCall; + Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0)); + if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + return Ret; + + // We're looking for a repeated factor in a multiplication tree, + // so we can do this fold: sqrt(x * x) -> fabs(x); + // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y). + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + Value *RepeatOp = nullptr; + Value *OtherOp = nullptr; + if (Op0 == Op1) { + // Simple match: the operands of the multiply are identical. + RepeatOp = Op0; + } else { + // Look for a more complicated pattern: one of the operands is itself + // a multiply, so search for a common factor in that multiply. + // Note: We don't bother looking any deeper than this first level or for + // variations of this pattern because instcombine's visitFMUL and/or the + // reassociation pass should give us this form. + Value *OtherMul0, *OtherMul1; + if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { + // Pattern: sqrt((x * y) * z) + if (OtherMul0 == OtherMul1 && + cast<Instruction>(Op0)->hasUnsafeAlgebra()) { + // Matched: sqrt((x * x) * z) + RepeatOp = OtherMul0; + OtherOp = Op1; } } } - return Ret; -} + if (!RepeatOp) + return Ret; + // Fast math flags for any created instructions should match the sqrt + // and multiply. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(I->getFastMathFlags()); + + // If we found a repeated factor, hoist it out of the square root and + // replace it with the fabs of that factor. + Module *M = Callee->getParent(); + Type *ArgType = I->getType(); + Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); + Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); + if (OtherOp) { + // If we found a non-repeated factor, we still need to get its square + // root. We then multiply that by the value that was simplified out + // of the square root calculation. + Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); + Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); + return B.CreateFMul(FabsCall, SqrtCall); + } + return FabsCall; +} + +// TODO: Generalize to handle any trig function and its inverse. Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1471,13 +1474,15 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { !FT->getParamType(0)->isFloatingPointTy()) return Ret; - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) - return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); if (!OpC) return Ret; + // Both calls must allow unsafe optimizations in order to remove them. + if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra()) + return Ret; + // tan(atan(x)) -> x // tanf(atanf(x)) -> x // tanl(atanl(x)) -> x diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 2e361d3..f47ddb9 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -222,8 +222,17 @@ static void resolveCycles(Metadata *MD, bool AllowTemps) { if (auto *N = dyn_cast_or_null<MDNode>(MD)) { if (AllowTemps && N->isTemporary()) return; - if (!N->isResolved()) - N->resolveCycles(AllowTemps); + if (!N->isResolved()) { + if (AllowTemps) + // Note that this will drop RAUW support on any temporaries, which + // blocks uniquing. If this ends up being an issue, in the future + // we can experiment with delaying resolving these nodes until + // after metadata is fully materialized (i.e. when linking metadata + // as a postpass after function importing). + N->resolveNonTemporaries(); + else + N->resolveCycles(); + } } } diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9ed44d1..27d3337 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -222,7 +222,7 @@ static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) { } } } - + /// \returns \p I after propagating metadata from \p VL. static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) { Instruction *I0 = cast<Instruction>(VL[0]); @@ -506,7 +506,7 @@ private: } return Last; } - + /// -- Vectorization State -- /// Holds all of the tree entries. std::vector<TreeEntry> VectorizableTree; @@ -884,7 +884,7 @@ private: /// The current size of the scheduling region. int ScheduleRegionSize; - + /// The maximum size allowed for the scheduling region. int ScheduleRegionSizeLimit; @@ -1089,7 +1089,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { newTreeEntry(VL, false); return; } - + // Check that every instructions appears once in this bundle. for (unsigned i = 0, e = VL.size(); i < e; ++i) for (unsigned j = i+1; j < e; ++j) @@ -1711,7 +1711,7 @@ int BoUpSLP::getSpillCost() { int Cost = 0; SmallPtrSet<Instruction*, 4> LiveValues; - Instruction *PrevInst = nullptr; + Instruction *PrevInst = nullptr; for (unsigned N = 0; N < VectorizableTree.size(); ++N) { Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]); @@ -1736,7 +1736,7 @@ int BoUpSLP::getSpillCost() { for (auto &J : PrevInst->operands()) { if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J)) LiveValues.insert(cast<Instruction>(&*J)); - } + } // Now find the sequence of instructions between PrevInst and Inst. BasicBlock::reverse_iterator InstIt(Inst->getIterator()), @@ -1780,30 +1780,29 @@ int BoUpSLP::getTreeCost() { unsigned BundleWidth = VectorizableTree[0].Scalars.size(); - for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) { - int C = getEntryCost(&VectorizableTree[i]); + for (TreeEntry &TE : VectorizableTree) { + int C = getEntryCost(&TE); DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " - << *VectorizableTree[i].Scalars[0] << " .\n"); + << TE.Scalars[0] << " .\n"); Cost += C; } SmallSet<Value *, 16> ExtractCostCalculated; int ExtractCost = 0; - for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end(); - I != E; ++I) { + for (ExternalUser &EU : ExternalUses) { // We only add extract cost once for the same scalar. - if (!ExtractCostCalculated.insert(I->Scalar).second) + if (!ExtractCostCalculated.insert(EU.Scalar).second) continue; // Uses by ephemeral values are free (because the ephemeral value will be // removed prior to code generation, and so the extraction will be // removed as well). - if (EphValues.count(I->User)) + if (EphValues.count(EU.User)) continue; - VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth); + VectorType *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth); ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, - I->Lane); + EU.Lane); } Cost += getSpillCost(); @@ -2551,7 +2550,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *BoUpSLP::vectorizeTree() { - + // All blocks must be scheduled before any instructions are inserted. for (auto &BSIter : BlocksSchedules) { scheduleBlock(BSIter.second.get()); @@ -3072,10 +3071,10 @@ void BoUpSLP::BlockScheduling::resetSchedule() { } void BoUpSLP::scheduleBlock(BlockScheduling *BS) { - + if (!BS->ScheduleStart) return; - + DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n"); BS->resetSchedule(); @@ -3590,7 +3589,7 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { /// \param NumEltsToRdx The number of elements that should be reduced in the /// vector. /// \param IsPairwise Whether the reduction is a pairwise or splitting -/// reduction. A pairwise reduction will generate a mask of +/// reduction. A pairwise reduction will generate a mask of /// <0,2,...> or <1,3,..> while a splitting reduction will generate /// <2,3, undef,undef> for a vector of 4 and NumElts = 2. /// \param IsLeft True will generate a mask of even elements, odd otherwise. @@ -3773,7 +3772,7 @@ public: IRBuilder<> Builder(ReductionRoot); FastMathFlags Unsafe; Unsafe.setUnsafeAlgebra(); - Builder.SetFastMathFlags(Unsafe); + Builder.setFastMathFlags(Unsafe); unsigned i = 0; for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { @@ -4018,9 +4017,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Collect the incoming values from the PHIs. Incoming.clear(); - for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie; - ++instr) { - PHINode *P = dyn_cast<PHINode>(instr); + for (Instruction &I : *BB) { + PHINode *P = dyn_cast<PHINode>(&I); if (!P) break; |