diff options
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r-- | lib/Transforms/Scalar/LICM.cpp | 115 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 182 | ||||
-rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 98 | ||||
-rw-r--r-- | lib/Transforms/Scalar/Reassociate.cpp | 34 | ||||
-rw-r--r-- | lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 52 |
5 files changed, 344 insertions, 137 deletions
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 6d70cdc..e01e23f 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -75,10 +75,12 @@ DisablePromotion("disable-licm-promotion", cl::Hidden, cl::desc("Disable memory promotion in LICM pass")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop); +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, + const LICMSafetyInfo *SafetyInfo); static bool hoist(Instruction &I, BasicBlock *Preheader); static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, - const Loop *CurLoop, AliasSetTracker *CurAST ); + const Loop *CurLoop, AliasSetTracker *CurAST, + const LICMSafetyInfo *SafetyInfo); static bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, @@ -92,10 +94,10 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst, static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); -static Instruction *CloneInstructionInExitBlock(const Instruction &I, - BasicBlock &ExitBlock, - PHINode &PN, - const LoopInfo *LI); +static Instruction * +CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + const LoopInfo *LI, + const LICMSafetyInfo *SafetyInfo); static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, @@ -348,10 +350,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - if (isNotUsedInLoop(I, CurLoop) && + if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) { ++II; - Changed |= sink(I, LI, DT, CurLoop, CurAST); + Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo); } } return Changed; @@ -432,6 +434,14 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); (I != E) && !SafetyInfo->MayThrow; ++I) SafetyInfo->MayThrow |= I->mayThrow(); + + // Compute funclet colors if we might sink/hoist in a function with a funclet + // personality routine. + Function *Fn = CurLoop->getHeader()->getParent(); + if (Fn->hasPersonalityFn()) + if (Constant *PersonalityFn = Fn->getPersonalityFn()) + if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) + SafetyInfo->BlockColors = colorEHFunclets(*Fn); } /// canSinkOrHoistInst - Return true if the hoister and sinker can handle this @@ -466,6 +476,10 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, if (isa<DbgInfoIntrinsic>(I)) return false; + // Don't sink calls which can throw. + if (CI->mayThrow()) + return false; + // Handle simple cases by querying alias analysis. FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == FMRB_DoesNotAccessMemory) @@ -534,10 +548,24 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) { +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, + const LICMSafetyInfo *SafetyInfo) { + const auto &BlockColors = SafetyInfo->BlockColors; for (const User *U : I.users()) { const Instruction *UI = cast<Instruction>(U); if (const PHINode *PN = dyn_cast<PHINode>(UI)) { + const BasicBlock *BB = PN->getParent(); + // We cannot sink uses in catchswitches. + if (isa<CatchSwitchInst>(BB->getTerminator())) + return false; + + // We need to sink a callsite to a unique funclet. Avoid sinking if the + // phi use is too muddled. + if (isa<CallInst>(I)) + if (!BlockColors.empty() && + BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) + return false; + // A PHI node where all of the incoming values are this instruction are // special -- they can just be RAUW'ed with the instruction and thus // don't require a use in the predecessor. This is a particular important @@ -565,11 +593,41 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) { return true; } -static Instruction *CloneInstructionInExitBlock(const Instruction &I, - BasicBlock &ExitBlock, - PHINode &PN, - const LoopInfo *LI) { - Instruction *New = I.clone(); +static Instruction * +CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + const LoopInfo *LI, + const LICMSafetyInfo *SafetyInfo) { + Instruction *New; + if (auto *CI = dyn_cast<CallInst>(&I)) { + const auto &BlockColors = SafetyInfo->BlockColors; + + // Sinking call-sites need to be handled differently from other + // instructions. The cloned call-site needs a funclet bundle operand + // appropriate for it's location in the CFG. + SmallVector<OperandBundleDef, 1> OpBundles; + for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); + BundleIdx != BundleEnd; ++BundleIdx) { + OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); + if (Bundle.getTagID() == LLVMContext::OB_funclet) + continue; + + OpBundles.emplace_back(Bundle); + } + + if (!BlockColors.empty()) { + const ColorVector &CV = BlockColors.find(&ExitBlock)->second; + assert(CV.size() == 1 && "non-unique color for exit block!"); + BasicBlock *BBColor = CV.front(); + Instruction *EHPad = BBColor->getFirstNonPHI(); + if (EHPad->isEHPad()) + OpBundles.emplace_back("funclet", EHPad); + } + + New = CallInst::Create(CI, OpBundles); + } else { + New = I.clone(); + } + ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); if (!I.getName().empty()) New->setName(I.getName() + ".le"); @@ -601,7 +659,8 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I, /// position, and may either delete it or move it to outside of the loop. /// static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, - const Loop *CurLoop, AliasSetTracker *CurAST ) { + const Loop *CurLoop, AliasSetTracker *CurAST, + const LICMSafetyInfo *SafetyInfo) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); bool Changed = false; if (isa<LoadInst>(I)) ++NumMovedLoads; @@ -652,7 +711,7 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, New = It->second; else New = SunkCopies[ExitBlock] = - CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI); + CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); PN->replaceAllUsesWith(New); PN->eraseFromParent(); @@ -950,6 +1009,21 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, if (!GuaranteedToExecute) return Changed; + // Figure out the loop exits and their insertion points, if this is the + // first promotion. + if (ExitBlocks.empty()) { + CurLoop->getUniqueExitBlocks(ExitBlocks); + InsertPts.clear(); + InsertPts.reserve(ExitBlocks.size()); + for (BasicBlock *ExitBlock : ExitBlocks) + InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + } + + // Can't insert into a catchswitch. + for (BasicBlock *ExitBlock : ExitBlocks) + if (isa<CatchSwitchInst>(ExitBlock->getTerminator())) + return Changed; + // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; @@ -961,15 +1035,6 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); - // Figure out the loop exits and their insertion points, if this is the - // first promotion. - if (ExitBlocks.empty()) { - CurLoop->getUniqueExitBlocks(ExitBlocks); - InsertPts.resize(ExitBlocks.size()); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt(); - } - // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 2d577de..4521640 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -108,7 +108,11 @@ public: private: typedef SmallVector<StoreInst *, 8> StoreList; - StoreList StoreRefs; + StoreList StoreRefsForMemset; + StoreList StoreRefsForMemcpy; + bool HasMemset; + bool HasMemsetPattern; + bool HasMemcpy; /// \name Countable Loop Idiom Handling /// @{ @@ -118,17 +122,15 @@ private: SmallVectorImpl<BasicBlock *> &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI); + bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy); bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, - unsigned StoreAlignment, Value *SplatValue, + unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride); - bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, - const SCEVAddRecExpr *StoreEv, - const SCEV *BECount, bool NegStride); + bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); /// @} /// \name Noncountable Loop Idiom Handling @@ -207,8 +209,13 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { *CurLoop->getHeader()->getParent()); DL = &CurLoop->getHeader()->getModule()->getDataLayout(); - if (SE->hasLoopInvariantBackedgeTakenCount(L)) - return runOnCountableLoop(); + HasMemset = TLI->has(LibFunc::memset); + HasMemsetPattern = TLI->has(LibFunc::memset_pattern16); + HasMemcpy = TLI->has(LibFunc::memcpy); + + if (HasMemset || HasMemsetPattern || HasMemcpy) + if (SE->hasLoopInvariantBackedgeTakenCount(L)) + return runOnCountableLoop(); return runOnNoncountableLoop(); } @@ -297,7 +304,8 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C)); } -bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) { +bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, + bool &ForMemcpy) { // Don't touch volatile stores. if (!SI->isSimple()) return false; @@ -322,22 +330,86 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) { if (!isa<SCEVConstant>(StoreEv->getOperand(1))) return false; - return true; + // See if the store can be turned into a memset. + + // If the stored value is a byte-wise value (like i32 -1), then it may be + // turned into a memset of i8 -1, assuming that all the consecutive bytes + // are stored. A store of i32 0x01020304 can never be turned into a memset, + // but it can be turned into memset_pattern if the target supports it. + Value *SplatValue = isBytewiseValue(StoredVal); + Constant *PatternValue = nullptr; + + // If we're allowed to form a memset, and the stored value would be + // acceptable for memset, use it. + if (HasMemset && SplatValue && + // Verify that the stored value is loop invariant. If not, we can't + // promote the memset. + CurLoop->isLoopInvariant(SplatValue)) { + // It looks like we can use SplatValue. + ForMemset = true; + return true; + } else if (HasMemsetPattern && + // Don't create memset_pattern16s with address spaces. + StorePtr->getType()->getPointerAddressSpace() == 0 && + (PatternValue = getMemSetPatternValue(StoredVal, DL))) { + // It looks like we can use PatternValue! + ForMemset = true; + return true; + } + + // Otherwise, see if the store can be turned into a memcpy. + if (HasMemcpy) { + // Check to see if the stride matches the size of the store. If so, then we + // know that every byte is touched in the loop. + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + if (StoreSize != Stride && StoreSize != -Stride) + return false; + + // The store must be feeding a non-volatile load. + LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); + if (!LI || !LI->isSimple()) + return false; + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided load. If we have something else, it's a + // random load we can't handle. + const SCEVAddRecExpr *LoadEv = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + + // The store and load must share the same stride. + if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) + return false; + + // Success. This store can be converted into a memcpy. + ForMemcpy = true; + return true; + } + // This store can't be transformed into a memset/memcpy. + return false; } void LoopIdiomRecognize::collectStores(BasicBlock *BB) { - StoreRefs.clear(); + StoreRefsForMemset.clear(); + StoreRefsForMemcpy.clear(); for (Instruction &I : *BB) { StoreInst *SI = dyn_cast<StoreInst>(&I); if (!SI) continue; + bool ForMemset = false; + bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI)) + if (!isLegalStore(SI, ForMemset, ForMemcpy)) continue; // Save the store locations. - StoreRefs.push_back(SI); + if (ForMemset) + StoreRefsForMemset.push_back(SI); + else if (ForMemcpy) + StoreRefsForMemcpy.push_back(SI); } } @@ -357,9 +429,15 @@ bool LoopIdiomRecognize::runOnLoopBlock( bool MadeChange = false; // Look for store instructions, which may be optimized to memset/memcpy. collectStores(BB); - for (auto &SI : StoreRefs) + + // Look for a single store which can be optimized into a memset. + for (auto &SI : StoreRefsForMemset) MadeChange |= processLoopStore(SI, BECount); + // Optimize the store into a memcpy, if it feeds an similarly strided load. + for (auto &SI : StoreRefsForMemcpy) + MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. @@ -380,7 +458,7 @@ bool LoopIdiomRecognize::runOnLoopBlock( return MadeChange; } -/// processLoopStore - See if this store can be promoted to a memset or memcpy. +/// processLoopStore - See if this store can be promoted to a memset. bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { assert(SI->isSimple() && "Expected only non-volatile stores."); @@ -398,12 +476,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { bool NegStride = StoreSize == -Stride; // See if we can optimize just this store in isolation. - if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), - StoredVal, SI, StoreEv, BECount, NegStride)) - return true; - - // Optimize the store into a memcpy, if it feeds an similarly strided load. - return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride); + return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), + StoredVal, SI, StoreEv, BECount, NegStride); } /// processLoopMemSet - See if this memset can be promoted to a large memset. @@ -440,8 +514,14 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, if (!Stride || MSI->getLength() != Stride->getValue()) return false; + // Verify that the memset value is loop invariant. If not, we can't promote + // the memset. + Value *SplatValue = MSI->getValue(); + if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue)) + return false; + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), MSI->getValue(), MSI, Ev, + MSI->getAlignment(), SplatValue, MSI, Ev, BECount, /*NegStride=*/false); } @@ -496,37 +576,19 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride) { - - // If the stored value is a byte-wise value (like i32 -1), then it may be - // turned into a memset of i8 -1, assuming that all the consecutive bytes - // are stored. A store of i32 0x01020304 can never be turned into a memset, - // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; - unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); - // If we're allowed to form a memset, and the stored value would be acceptable - // for memset, use it. - if (SplatValue && TLI->has(LibFunc::memset) && - // Verify that the stored value is loop invariant. If not, we can't - // promote the memset. - CurLoop->isLoopInvariant(SplatValue)) { - // Keep and use SplatValue. - PatternValue = nullptr; - } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && - (PatternValue = getMemSetPatternValue(StoredVal, DL))) { - // Don't create memset_pattern16s with address spaces. - // It looks like we can use PatternValue! - SplatValue = nullptr; - } else { - // Otherwise, this isn't an idiom we can transform. For example, we can't - // do anything with a 3-byte store. - return false; - } + if (!SplatValue) + PatternValue = getMemSetPatternValue(StoredVal, DL); + + assert((SplatValue || PatternValue) && + "Expected either splat value or pattern value."); // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. + unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); @@ -608,29 +670,25 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like /// for (i) A[i] = B[i]; -bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( - StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv, - const SCEV *BECount, bool NegStride) { - // If we're not allowed to form memcpy, we fail. - if (!TLI->has(LibFunc::memcpy)) - return false; +bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, + const SCEV *BECount) { + assert(SI->isSimple() && "Expected only non-volatile stores."); + + Value *StorePtr = SI->getPointerOperand(); + const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + bool NegStride = StoreSize == -Stride; // The store must be feeding a non-volatile load. - LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); - if (!LI || !LI->isSimple()) - return false; + LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + assert(LI->isSimple() && "Expected only non-volatile stores."); // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a // random load we can't handle. const SCEVAddRecExpr *LoadEv = - dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); - if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; - - // The store and load must share the same stride. - if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) - return false; + cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0333bf2..7354016 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -481,6 +481,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, return AMemSet; } +static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, + const LoadInst *LI) { + unsigned StoreAlign = SI->getAlignment(); + if (!StoreAlign) + StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); + unsigned LoadAlign = LI->getAlignment(); + if (!LoadAlign) + LoadAlign = DL.getABITypeAlignment(LI->getType()); + + return std::min(StoreAlign, LoadAlign); +} bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; @@ -496,12 +507,84 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { const DataLayout &DL = SI->getModule()->getDataLayout(); - // Detect cases where we're performing call slot forwarding, but - // happen to be using a load-store pair to implement it, rather than - // a memcpy. + // Load to store forwarding can be interpreted as memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { + + auto *T = LI->getType(); + if (T->isAggregateType()) { + AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + MemoryLocation LoadLoc = MemoryLocation::get(LI); + + // We use alias analysis to check if an instruction may store to + // the memory we load from in between the load and the store. If + // such an instruction is found, we try to promote there instead + // of at the store position. + Instruction *P = SI; + for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator(); + I != E; ++I) { + if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod)) + continue; + + // We found an instruction that may write to the loaded memory. + // We can try to promote at this position instead of the store + // position if nothing alias the store memory after this. + P = &*I; + for (; I != E; ++I) { + MemoryLocation StoreLoc = MemoryLocation::get(SI); + if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { + DEBUG(dbgs() << "Alias " << *I << "\n"); + P = nullptr; + break; + } + } + + break; + } + + // If a valid insertion position is found, then we can promote + // the load/store pair to a memcpy. + if (P) { + // If we load from memory that may alias the memory we store to, + // memmove must be used to preserve semantic. If not, memcpy can + // be used. + bool UseMemMove = false; + if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) + UseMemMove = true; + + unsigned Align = findCommonAlignment(DL, SI, LI); + uint64_t Size = DL.getTypeStoreSize(T); + + IRBuilder<> Builder(P); + Instruction *M; + if (UseMemMove) + M = Builder.CreateMemMove(SI->getPointerOperand(), + LI->getPointerOperand(), Size, + Align, SI->isVolatile()); + else + M = Builder.CreateMemCpy(SI->getPointerOperand(), + LI->getPointerOperand(), Size, + Align, SI->isVolatile()); + + DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI + << " => " << *M << "\n"); + + MD->removeInstruction(SI); + SI->eraseFromParent(); + MD->removeInstruction(LI); + LI->eraseFromParent(); + ++NumMemCpyInstr; + + // Make sure we do not invalidate the iterator. + BBI = M->getIterator(); + return true; + } + } + + // Detect cases where we're performing call slot forwarding, but + // happen to be using a load-store pair to implement it, rather than + // a memcpy. MemDepResult ldep = MD->getDependency(LI); CallInst *C = nullptr; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) @@ -522,18 +605,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { } if (C) { - unsigned storeAlign = SI->getAlignment(); - if (!storeAlign) - storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); - unsigned loadAlign = LI->getAlignment(); - if (!loadAlign) - loadAlign = DL.getABITypeAlignment(LI->getType()); - bool changed = performCallSlotOptzn( LI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), - std::min(storeAlign, loadAlign), C); + findCommonAlignment(DL, SI, LI), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index fb970c7..401a740 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -183,6 +183,8 @@ namespace { Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); Value *RemoveFactorFromExpression(Value *V, Value *Factor); void EraseInst(Instruction *I); + void RecursivelyEraseDeadInsts(Instruction *I, + SetVector<AssertingVH<Instruction>> &Insts); void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); }; @@ -1926,6 +1928,22 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, return nullptr; } +// Remove dead instructions and if any operands are trivially dead add them to +// Insts so they will be removed as well. +void Reassociate::RecursivelyEraseDeadInsts( + Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) { + assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); + SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end()); + ValueRankMap.erase(I); + Insts.remove(I); + RedoInsts.remove(I); + I->eraseFromParent(); + for (auto Op : Ops) + if (Instruction *OpInst = dyn_cast<Instruction>(Op)) + if (OpInst->use_empty()) + Insts.insert(OpInst); +} + /// Zap the given instruction, adding interesting operands to the work list. void Reassociate::EraseInst(Instruction *I) { assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); @@ -2255,7 +2273,21 @@ bool Reassociate::runOnFunction(Function &F) { ++II; } - // If this produced extra instructions to optimize, handle them now. + // Make a copy of all the instructions to be redone so we can remove dead + // instructions. + SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts); + // Iterate over all instructions to be reevaluated and remove trivially dead + // instructions. If any operand of the trivially dead instruction becomes + // dead mark it for deletion as well. Continue this process until all + // trivially dead instructions have been removed. + while (!ToRedo.empty()) { + Instruction *I = ToRedo.pop_back_val(); + if (isInstructionTriviallyDead(I)) + RecursivelyEraseDeadInsts(I, ToRedo); + } + + // Now that we have removed dead instructions, we can reoptimize the + // remaining instructions. while (!RedoInsts.empty()) { Instruction *I = RedoInsts.pop_back_val(); if (isInstructionTriviallyDead(I)) diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index db127c3..5d253be 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -428,30 +428,15 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // We should have never reached here if this argument isn't an gc value return BaseDefiningValueResult(I, true); - if (isa<GlobalVariable>(I)) - // base case + if (isa<Constant>(I)) + // We assume that objects with a constant base (e.g. a global) can't move + // and don't need to be reported to the collector because they are always + // live. All constants have constant bases. Besides global references, all + // kinds of constants (e.g. undef, constant expressions, null pointers) can + // be introduced by the inliner or the optimizer, especially on dynamically + // dead paths. See e.g. test4 in constants.ll. return BaseDefiningValueResult(I, true); - // inlining could possibly introduce phi node that contains - // undef if callee has multiple returns - if (isa<UndefValue>(I)) - // utterly meaningless, but useful for dealing with - // partially optimized code. - return BaseDefiningValueResult(I, true); - - // Due to inheritance, this must be _after_ the global variable and undef - // checks - if (isa<Constant>(I)) { - assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && - "order of checks wrong!"); - // Note: Even for frontends which don't have constant references, we can - // see constants appearing after optimizations. A simple example is - // specialization of an address computation on null feeding into a merge - // point where the actual use of the now-constant input is protected by - // another null check. (e.g. test4 in constants.ll) - return BaseDefiningValueResult(I, true); - } - if (CastInst *CI = dyn_cast<CastInst>(I)) { Value *Def = CI->stripPointerCasts(); // If stripping pointer casts changes the address space there is an @@ -1642,33 +1627,24 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, DenseSet<Value *> &VisitedLiveValues) { for (User *U : GCRelocs) { - if (!isa<IntrinsicInst>(U)) + GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U); + if (!Relocate) continue; - IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U); - - // We only care about relocates - if (RelocatedValue->getIntrinsicID() != - Intrinsic::experimental_gc_relocate) { - continue; - } - - GCRelocateOperands RelocateOperands(RelocatedValue); - Value *OriginalValue = - const_cast<Value *>(RelocateOperands.getDerivedPtr()); + Value *OriginalValue = const_cast<Value *>(Relocate->getDerivedPtr()); assert(AllocaMap.count(OriginalValue)); Value *Alloca = AllocaMap[OriginalValue]; // Emit store into the related alloca // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to // the correct type according to alloca. - assert(RelocatedValue->getNextNode() && + assert(Relocate->getNextNode() && "Should always have one since it's not a terminator"); - IRBuilder<> Builder(RelocatedValue->getNextNode()); + IRBuilder<> Builder(Relocate->getNextNode()); Value *CastedRelocatedValue = - Builder.CreateBitCast(RelocatedValue, + Builder.CreateBitCast(Relocate, cast<AllocaInst>(Alloca)->getAllocatedType(), - suffixed_name_or(RelocatedValue, ".casted", "")); + suffixed_name_or(Relocate, ".casted", "")); StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca); Store->insertAfter(cast<Instruction>(CastedRelocatedValue)); |