diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/IPO/InferFunctionAttrs.cpp | 53 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 16 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineInternal.h | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 29 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/InstrProfiling.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LICM.cpp | 115 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 182 | ||||
-rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 98 | ||||
-rw-r--r-- | lib/Transforms/Scalar/Reassociate.cpp | 34 | ||||
-rw-r--r-- | lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 52 | ||||
-rw-r--r-- | lib/Transforms/Utils/BypassSlowDivision.cpp | 100 | ||||
-rw-r--r-- | lib/Transforms/Utils/Local.cpp | 47 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 66 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 78 | ||||
-rw-r--r-- | lib/Transforms/Utils/ValueMapper.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 12 |
18 files changed, 630 insertions, 282 deletions
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp index d02c861..4295a75 100644 --- a/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -10,6 +10,7 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -21,10 +22,12 @@ using namespace llvm; STATISTIC(NumReadNone, "Number of functions inferred as readnone"); STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); +STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); static bool setDoesNotAccessMemory(Function &F) { if (F.doesNotAccessMemory()) @@ -42,6 +45,15 @@ static bool setOnlyReadsMemory(Function &F) { return true; } +static bool setOnlyAccessesArgMemory(Function &F) { + if (F.onlyAccessesArgMemory()) + return false; + F.setOnlyAccessesArgMemory (); + ++NumArgMemOnly; + return true; +} + + static bool setDoesNotThrow(Function &F) { if (F.doesNotThrow()) return false; @@ -74,6 +86,17 @@ static bool setDoesNotAlias(Function &F, unsigned n) { return true; } +static bool setNonNull(Function &F, unsigned n) { + assert((n != AttributeSet::ReturnIndex || + F.getReturnType()->isPointerTy()) && + "nonnull applies only to pointers"); + if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) + return false; + F.addAttribute(n, Attribute::NonNull); + ++NumNonNull; + return true; +} + /// Analyze the name and prototype of the given function and set any applicable /// attributes. /// @@ -89,7 +112,6 @@ static bool inferPrototypeAttributes(Function &F, return false; bool Changed = false; - switch (TheLibFunc) { case LibFunc::strlen: if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) @@ -873,6 +895,35 @@ static bool inferPrototypeAttributes(Function &F, Changed |= setDoesNotCapture(F, 2); return Changed; + case LibFunc::Znwj: // new(unsigned int) + case LibFunc::Znwm: // new(unsigned long) + case LibFunc::Znaj: // new[](unsigned int) + case LibFunc::Znam: // new[](unsigned long) + case LibFunc::msvc_new_int: // new(unsigned int) + case LibFunc::msvc_new_longlong: // new(unsigned long long) + case LibFunc::msvc_new_array_int: // new[](unsigned int) + case LibFunc::msvc_new_array_longlong: // new[](unsigned long long) + if (FTy->getNumParams() != 1) + return false; + // Operator new always returns a nonnull noalias pointer + Changed |= setNonNull(F, AttributeSet::ReturnIndex); + Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex); + return Changed; + + //TODO: add LibFunc entries for: + //case LibFunc::memset_pattern4: + //case LibFunc::memset_pattern8: + case LibFunc::memset_pattern16: + if (FTy->isVarArg() || FTy->getNumParams() != 3 || + !isa<PointerType>(FTy->getParamType(0)) || + !isa<PointerType>(FTy->getParamType(1)) || + !isa<IntegerType>(FTy->getParamType(2))) + return false; + + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + default: // FIXME: It'd be really nice to cover all the library functions we're // aware of here. diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index e3634f2..090245d 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1747,8 +1747,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Translate facts known about a pointer before relocating into // facts about the relocate value, while being careful to // preserve relocation semantics. - GCRelocateOperands Operands(II); - Value *DerivedPtr = Operands.getDerivedPtr(); + Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr(); auto *GCRelocateType = cast<PointerType>(II->getType()); // Remove the relocation if unused, note that this check is required diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index da835a1..0f01d18 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -591,19 +591,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // zext (x <s 0) to i32 --> x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || - (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { + (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { if (!DoXform) return ICI; Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); + In->getType()->getScalarSizeInBits() - 1); + In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit"); if (In->getType() != CI.getType()) In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, In->getName()+".not"); + In = Builder->CreateXor(In, One, In->getName() + ".not"); } return ReplaceInstUsesWith(CI, In); @@ -639,13 +639,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, return ReplaceInstUsesWith(CI, Res); } - uint32_t ShiftAmt = KnownZeroMask.logBase2(); + uint32_t ShAmt = KnownZeroMask.logBase2(); Value *In = ICI->getOperand(0); - if (ShiftAmt) { + if (ShAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), - In->getName()+".lobit"); + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt), + In->getName() + ".lobit"); } if ((Op1CV != 0) == isNE) { // Toggle the low bit. diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 534f670..e4e5065 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -414,7 +414,7 @@ public: /// \brief A combiner-aware RAUW-like routine. /// /// This method is to be used when an instruction is found to be dead, - /// replacable with another preexisting expression. Here we add all uses of + /// replaceable with another preexisting expression. Here we add all uses of /// I to the worklist, replace all uses of I with the new value, then return /// I, so that the inst combiner will know that I was modified. Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e25639a..54a9fbd 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -383,15 +383,28 @@ static void replaceExtractElements(InsertElementInst *InsElt, auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ConstantVector::get(ExtendMask)); - // Replace all extracts from the original narrow vector with extracts from - // the new wide vector. - WideVec->insertBefore(ExtElt); + // Insert the new shuffle after the vector operand of the extract is defined + // or at the start of the basic block, so any subsequent extracts can use it. + bool ReplaceAllExtUsers; + if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) { + WideVec->insertAfter(ExtVecOpInst); + ReplaceAllExtUsers = true; + } else { + // TODO: Insert at start of function, so it's always safe to replace all? + IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); + ReplaceAllExtUsers = false; + } + + // Replace extracts from the original narrow vector with extracts from the new + // wide vector. for (User *U : ExtVecOp->users()) { - if (ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U)) { - auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); - NewExt->insertAfter(WideVec); - IC.ReplaceInstUsesWith(*OldExt, NewExt); - } + ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); + if (!OldExt || + (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent())) + continue; + auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); + NewExt->insertAfter(WideVec); + IC.ReplaceInstUsesWith(*OldExt, NewExt); } } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 7c46cfd..903a0b5 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3021,7 +3021,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, Instruction *Inst = &*--EndInst->getIterator(); if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); - if (Inst->isEHPad()) { + if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { EndInst = Inst; continue; } @@ -3029,8 +3029,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, ++NumDeadInst; MadeIRChange = true; } - if (!Inst->getType()->isTokenTy()) - Inst->eraseFromParent(); + Inst->eraseFromParent(); } } diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 92e41ee..51ff95d 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -234,16 +234,14 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { } void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { - CoverageData->setSection(getCoverageSection()); - CoverageData->setAlignment(8); Constant *Init = CoverageData->getInitializer(); - // We're expecting { i32, i32, i32, i32, [n x { i8*, i32, i32 }], [m x i8] } + // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] } // for some C. If not, the frontend's given us something broken. - assert(Init->getNumOperands() == 6 && "bad number of fields in coverage map"); - assert(isa<ConstantArray>(Init->getAggregateElement(4)) && + assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map"); + assert(isa<ConstantArray>(Init->getAggregateElement(1)) && "invalid function list in coverage map"); - ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(4)); + ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1)); for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) { Constant *Record = Records->getOperand(I); Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts(); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 6d70cdc..e01e23f 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -75,10 +75,12 @@ DisablePromotion("disable-licm-promotion", cl::Hidden, cl::desc("Disable memory promotion in LICM pass")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop); +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, + const LICMSafetyInfo *SafetyInfo); static bool hoist(Instruction &I, BasicBlock *Preheader); static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, - const Loop *CurLoop, AliasSetTracker *CurAST ); + const Loop *CurLoop, AliasSetTracker *CurAST, + const LICMSafetyInfo *SafetyInfo); static bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, @@ -92,10 +94,10 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst, static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); -static Instruction *CloneInstructionInExitBlock(const Instruction &I, - BasicBlock &ExitBlock, - PHINode &PN, - const LoopInfo *LI); +static Instruction * +CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + const LoopInfo *LI, + const LICMSafetyInfo *SafetyInfo); static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, @@ -348,10 +350,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - if (isNotUsedInLoop(I, CurLoop) && + if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) { ++II; - Changed |= sink(I, LI, DT, CurLoop, CurAST); + Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo); } } return Changed; @@ -432,6 +434,14 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); (I != E) && !SafetyInfo->MayThrow; ++I) SafetyInfo->MayThrow |= I->mayThrow(); + + // Compute funclet colors if we might sink/hoist in a function with a funclet + // personality routine. + Function *Fn = CurLoop->getHeader()->getParent(); + if (Fn->hasPersonalityFn()) + if (Constant *PersonalityFn = Fn->getPersonalityFn()) + if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) + SafetyInfo->BlockColors = colorEHFunclets(*Fn); } /// canSinkOrHoistInst - Return true if the hoister and sinker can handle this @@ -466,6 +476,10 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, if (isa<DbgInfoIntrinsic>(I)) return false; + // Don't sink calls which can throw. + if (CI->mayThrow()) + return false; + // Handle simple cases by querying alias analysis. FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == FMRB_DoesNotAccessMemory) @@ -534,10 +548,24 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) { +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, + const LICMSafetyInfo *SafetyInfo) { + const auto &BlockColors = SafetyInfo->BlockColors; for (const User *U : I.users()) { const Instruction *UI = cast<Instruction>(U); if (const PHINode *PN = dyn_cast<PHINode>(UI)) { + const BasicBlock *BB = PN->getParent(); + // We cannot sink uses in catchswitches. + if (isa<CatchSwitchInst>(BB->getTerminator())) + return false; + + // We need to sink a callsite to a unique funclet. Avoid sinking if the + // phi use is too muddled. + if (isa<CallInst>(I)) + if (!BlockColors.empty() && + BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) + return false; + // A PHI node where all of the incoming values are this instruction are // special -- they can just be RAUW'ed with the instruction and thus // don't require a use in the predecessor. This is a particular important @@ -565,11 +593,41 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) { return true; } -static Instruction *CloneInstructionInExitBlock(const Instruction &I, - BasicBlock &ExitBlock, - PHINode &PN, - const LoopInfo *LI) { - Instruction *New = I.clone(); +static Instruction * +CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + const LoopInfo *LI, + const LICMSafetyInfo *SafetyInfo) { + Instruction *New; + if (auto *CI = dyn_cast<CallInst>(&I)) { + const auto &BlockColors = SafetyInfo->BlockColors; + + // Sinking call-sites need to be handled differently from other + // instructions. The cloned call-site needs a funclet bundle operand + // appropriate for it's location in the CFG. + SmallVector<OperandBundleDef, 1> OpBundles; + for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); + BundleIdx != BundleEnd; ++BundleIdx) { + OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); + if (Bundle.getTagID() == LLVMContext::OB_funclet) + continue; + + OpBundles.emplace_back(Bundle); + } + + if (!BlockColors.empty()) { + const ColorVector &CV = BlockColors.find(&ExitBlock)->second; + assert(CV.size() == 1 && "non-unique color for exit block!"); + BasicBlock *BBColor = CV.front(); + Instruction *EHPad = BBColor->getFirstNonPHI(); + if (EHPad->isEHPad()) + OpBundles.emplace_back("funclet", EHPad); + } + + New = CallInst::Create(CI, OpBundles); + } else { + New = I.clone(); + } + ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); if (!I.getName().empty()) New->setName(I.getName() + ".le"); @@ -601,7 +659,8 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I, /// position, and may either delete it or move it to outside of the loop. /// static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, - const Loop *CurLoop, AliasSetTracker *CurAST ) { + const Loop *CurLoop, AliasSetTracker *CurAST, + const LICMSafetyInfo *SafetyInfo) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); bool Changed = false; if (isa<LoadInst>(I)) ++NumMovedLoads; @@ -652,7 +711,7 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, New = It->second; else New = SunkCopies[ExitBlock] = - CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI); + CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); PN->replaceAllUsesWith(New); PN->eraseFromParent(); @@ -950,6 +1009,21 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, if (!GuaranteedToExecute) return Changed; + // Figure out the loop exits and their insertion points, if this is the + // first promotion. + if (ExitBlocks.empty()) { + CurLoop->getUniqueExitBlocks(ExitBlocks); + InsertPts.clear(); + InsertPts.reserve(ExitBlocks.size()); + for (BasicBlock *ExitBlock : ExitBlocks) + InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + } + + // Can't insert into a catchswitch. + for (BasicBlock *ExitBlock : ExitBlocks) + if (isa<CatchSwitchInst>(ExitBlock->getTerminator())) + return Changed; + // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; @@ -961,15 +1035,6 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); - // Figure out the loop exits and their insertion points, if this is the - // first promotion. - if (ExitBlocks.empty()) { - CurLoop->getUniqueExitBlocks(ExitBlocks); - InsertPts.resize(ExitBlocks.size()); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt(); - } - // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 2d577de..4521640 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -108,7 +108,11 @@ public: private: typedef SmallVector<StoreInst *, 8> StoreList; - StoreList StoreRefs; + StoreList StoreRefsForMemset; + StoreList StoreRefsForMemcpy; + bool HasMemset; + bool HasMemsetPattern; + bool HasMemcpy; /// \name Countable Loop Idiom Handling /// @{ @@ -118,17 +122,15 @@ private: SmallVectorImpl<BasicBlock *> &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI); + bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy); bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, - unsigned StoreAlignment, Value *SplatValue, + unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride); - bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, - const SCEVAddRecExpr *StoreEv, - const SCEV *BECount, bool NegStride); + bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); /// @} /// \name Noncountable Loop Idiom Handling @@ -207,8 +209,13 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { *CurLoop->getHeader()->getParent()); DL = &CurLoop->getHeader()->getModule()->getDataLayout(); - if (SE->hasLoopInvariantBackedgeTakenCount(L)) - return runOnCountableLoop(); + HasMemset = TLI->has(LibFunc::memset); + HasMemsetPattern = TLI->has(LibFunc::memset_pattern16); + HasMemcpy = TLI->has(LibFunc::memcpy); + + if (HasMemset || HasMemsetPattern || HasMemcpy) + if (SE->hasLoopInvariantBackedgeTakenCount(L)) + return runOnCountableLoop(); return runOnNoncountableLoop(); } @@ -297,7 +304,8 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C)); } -bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) { +bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, + bool &ForMemcpy) { // Don't touch volatile stores. if (!SI->isSimple()) return false; @@ -322,22 +330,86 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) { if (!isa<SCEVConstant>(StoreEv->getOperand(1))) return false; - return true; + // See if the store can be turned into a memset. + + // If the stored value is a byte-wise value (like i32 -1), then it may be + // turned into a memset of i8 -1, assuming that all the consecutive bytes + // are stored. A store of i32 0x01020304 can never be turned into a memset, + // but it can be turned into memset_pattern if the target supports it. + Value *SplatValue = isBytewiseValue(StoredVal); + Constant *PatternValue = nullptr; + + // If we're allowed to form a memset, and the stored value would be + // acceptable for memset, use it. + if (HasMemset && SplatValue && + // Verify that the stored value is loop invariant. If not, we can't + // promote the memset. + CurLoop->isLoopInvariant(SplatValue)) { + // It looks like we can use SplatValue. + ForMemset = true; + return true; + } else if (HasMemsetPattern && + // Don't create memset_pattern16s with address spaces. + StorePtr->getType()->getPointerAddressSpace() == 0 && + (PatternValue = getMemSetPatternValue(StoredVal, DL))) { + // It looks like we can use PatternValue! + ForMemset = true; + return true; + } + + // Otherwise, see if the store can be turned into a memcpy. + if (HasMemcpy) { + // Check to see if the stride matches the size of the store. If so, then we + // know that every byte is touched in the loop. + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + if (StoreSize != Stride && StoreSize != -Stride) + return false; + + // The store must be feeding a non-volatile load. + LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); + if (!LI || !LI->isSimple()) + return false; + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided load. If we have something else, it's a + // random load we can't handle. + const SCEVAddRecExpr *LoadEv = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + + // The store and load must share the same stride. + if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) + return false; + + // Success. This store can be converted into a memcpy. + ForMemcpy = true; + return true; + } + // This store can't be transformed into a memset/memcpy. + return false; } void LoopIdiomRecognize::collectStores(BasicBlock *BB) { - StoreRefs.clear(); + StoreRefsForMemset.clear(); + StoreRefsForMemcpy.clear(); for (Instruction &I : *BB) { StoreInst *SI = dyn_cast<StoreInst>(&I); if (!SI) continue; + bool ForMemset = false; + bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI)) + if (!isLegalStore(SI, ForMemset, ForMemcpy)) continue; // Save the store locations. - StoreRefs.push_back(SI); + if (ForMemset) + StoreRefsForMemset.push_back(SI); + else if (ForMemcpy) + StoreRefsForMemcpy.push_back(SI); } } @@ -357,9 +429,15 @@ bool LoopIdiomRecognize::runOnLoopBlock( bool MadeChange = false; // Look for store instructions, which may be optimized to memset/memcpy. collectStores(BB); - for (auto &SI : StoreRefs) + + // Look for a single store which can be optimized into a memset. + for (auto &SI : StoreRefsForMemset) MadeChange |= processLoopStore(SI, BECount); + // Optimize the store into a memcpy, if it feeds an similarly strided load. + for (auto &SI : StoreRefsForMemcpy) + MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. @@ -380,7 +458,7 @@ bool LoopIdiomRecognize::runOnLoopBlock( return MadeChange; } -/// processLoopStore - See if this store can be promoted to a memset or memcpy. +/// processLoopStore - See if this store can be promoted to a memset. bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { assert(SI->isSimple() && "Expected only non-volatile stores."); @@ -398,12 +476,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { bool NegStride = StoreSize == -Stride; // See if we can optimize just this store in isolation. - if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), - StoredVal, SI, StoreEv, BECount, NegStride)) - return true; - - // Optimize the store into a memcpy, if it feeds an similarly strided load. - return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride); + return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), + StoredVal, SI, StoreEv, BECount, NegStride); } /// processLoopMemSet - See if this memset can be promoted to a large memset. @@ -440,8 +514,14 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, if (!Stride || MSI->getLength() != Stride->getValue()) return false; + // Verify that the memset value is loop invariant. If not, we can't promote + // the memset. + Value *SplatValue = MSI->getValue(); + if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue)) + return false; + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), MSI->getValue(), MSI, Ev, + MSI->getAlignment(), SplatValue, MSI, Ev, BECount, /*NegStride=*/false); } @@ -496,37 +576,19 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride) { - - // If the stored value is a byte-wise value (like i32 -1), then it may be - // turned into a memset of i8 -1, assuming that all the consecutive bytes - // are stored. A store of i32 0x01020304 can never be turned into a memset, - // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; - unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); - // If we're allowed to form a memset, and the stored value would be acceptable - // for memset, use it. - if (SplatValue && TLI->has(LibFunc::memset) && - // Verify that the stored value is loop invariant. If not, we can't - // promote the memset. - CurLoop->isLoopInvariant(SplatValue)) { - // Keep and use SplatValue. - PatternValue = nullptr; - } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && - (PatternValue = getMemSetPatternValue(StoredVal, DL))) { - // Don't create memset_pattern16s with address spaces. - // It looks like we can use PatternValue! - SplatValue = nullptr; - } else { - // Otherwise, this isn't an idiom we can transform. For example, we can't - // do anything with a 3-byte store. - return false; - } + if (!SplatValue) + PatternValue = getMemSetPatternValue(StoredVal, DL); + + assert((SplatValue || PatternValue) && + "Expected either splat value or pattern value."); // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. + unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); @@ -608,29 +670,25 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like /// for (i) A[i] = B[i]; -bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( - StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv, - const SCEV *BECount, bool NegStride) { - // If we're not allowed to form memcpy, we fail. - if (!TLI->has(LibFunc::memcpy)) - return false; +bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, + const SCEV *BECount) { + assert(SI->isSimple() && "Expected only non-volatile stores."); + + Value *StorePtr = SI->getPointerOperand(); + const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + bool NegStride = StoreSize == -Stride; // The store must be feeding a non-volatile load. - LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); - if (!LI || !LI->isSimple()) - return false; + LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + assert(LI->isSimple() && "Expected only non-volatile stores."); // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a // random load we can't handle. const SCEVAddRecExpr *LoadEv = - dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); - if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; - - // The store and load must share the same stride. - if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) - return false; + cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0333bf2..7354016 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -481,6 +481,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, return AMemSet; } +static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, + const LoadInst *LI) { + unsigned StoreAlign = SI->getAlignment(); + if (!StoreAlign) + StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); + unsigned LoadAlign = LI->getAlignment(); + if (!LoadAlign) + LoadAlign = DL.getABITypeAlignment(LI->getType()); + + return std::min(StoreAlign, LoadAlign); +} bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; @@ -496,12 +507,84 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { const DataLayout &DL = SI->getModule()->getDataLayout(); - // Detect cases where we're performing call slot forwarding, but - // happen to be using a load-store pair to implement it, rather than - // a memcpy. + // Load to store forwarding can be interpreted as memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { + + auto *T = LI->getType(); + if (T->isAggregateType()) { + AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + MemoryLocation LoadLoc = MemoryLocation::get(LI); + + // We use alias analysis to check if an instruction may store to + // the memory we load from in between the load and the store. If + // such an instruction is found, we try to promote there instead + // of at the store position. + Instruction *P = SI; + for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator(); + I != E; ++I) { + if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod)) + continue; + + // We found an instruction that may write to the loaded memory. + // We can try to promote at this position instead of the store + // position if nothing alias the store memory after this. + P = &*I; + for (; I != E; ++I) { + MemoryLocation StoreLoc = MemoryLocation::get(SI); + if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { + DEBUG(dbgs() << "Alias " << *I << "\n"); + P = nullptr; + break; + } + } + + break; + } + + // If a valid insertion position is found, then we can promote + // the load/store pair to a memcpy. + if (P) { + // If we load from memory that may alias the memory we store to, + // memmove must be used to preserve semantic. If not, memcpy can + // be used. + bool UseMemMove = false; + if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) + UseMemMove = true; + + unsigned Align = findCommonAlignment(DL, SI, LI); + uint64_t Size = DL.getTypeStoreSize(T); + + IRBuilder<> Builder(P); + Instruction *M; + if (UseMemMove) + M = Builder.CreateMemMove(SI->getPointerOperand(), + LI->getPointerOperand(), Size, + Align, SI->isVolatile()); + else + M = Builder.CreateMemCpy(SI->getPointerOperand(), + LI->getPointerOperand(), Size, + Align, SI->isVolatile()); + + DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI + << " => " << *M << "\n"); + + MD->removeInstruction(SI); + SI->eraseFromParent(); + MD->removeInstruction(LI); + LI->eraseFromParent(); + ++NumMemCpyInstr; + + // Make sure we do not invalidate the iterator. + BBI = M->getIterator(); + return true; + } + } + + // Detect cases where we're performing call slot forwarding, but + // happen to be using a load-store pair to implement it, rather than + // a memcpy. MemDepResult ldep = MD->getDependency(LI); CallInst *C = nullptr; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) @@ -522,18 +605,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { } if (C) { - unsigned storeAlign = SI->getAlignment(); - if (!storeAlign) - storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); - unsigned loadAlign = LI->getAlignment(); - if (!loadAlign) - loadAlign = DL.getABITypeAlignment(LI->getType()); - bool changed = performCallSlotOptzn( LI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), - std::min(storeAlign, loadAlign), C); + findCommonAlignment(DL, SI, LI), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index fb970c7..401a740 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -183,6 +183,8 @@ namespace { Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); Value *RemoveFactorFromExpression(Value *V, Value *Factor); void EraseInst(Instruction *I); + void RecursivelyEraseDeadInsts(Instruction *I, + SetVector<AssertingVH<Instruction>> &Insts); void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); }; @@ -1926,6 +1928,22 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, return nullptr; } +// Remove dead instructions and if any operands are trivially dead add them to +// Insts so they will be removed as well. +void Reassociate::RecursivelyEraseDeadInsts( + Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) { + assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); + SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end()); + ValueRankMap.erase(I); + Insts.remove(I); + RedoInsts.remove(I); + I->eraseFromParent(); + for (auto Op : Ops) + if (Instruction *OpInst = dyn_cast<Instruction>(Op)) + if (OpInst->use_empty()) + Insts.insert(OpInst); +} + /// Zap the given instruction, adding interesting operands to the work list. void Reassociate::EraseInst(Instruction *I) { assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); @@ -2255,7 +2273,21 @@ bool Reassociate::runOnFunction(Function &F) { ++II; } - // If this produced extra instructions to optimize, handle them now. + // Make a copy of all the instructions to be redone so we can remove dead + // instructions. + SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts); + // Iterate over all instructions to be reevaluated and remove trivially dead + // instructions. If any operand of the trivially dead instruction becomes + // dead mark it for deletion as well. Continue this process until all + // trivially dead instructions have been removed. + while (!ToRedo.empty()) { + Instruction *I = ToRedo.pop_back_val(); + if (isInstructionTriviallyDead(I)) + RecursivelyEraseDeadInsts(I, ToRedo); + } + + // Now that we have removed dead instructions, we can reoptimize the + // remaining instructions. while (!RedoInsts.empty()) { Instruction *I = RedoInsts.pop_back_val(); if (isInstructionTriviallyDead(I)) diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index db127c3..5d253be 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -428,30 +428,15 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // We should have never reached here if this argument isn't an gc value return BaseDefiningValueResult(I, true); - if (isa<GlobalVariable>(I)) - // base case + if (isa<Constant>(I)) + // We assume that objects with a constant base (e.g. a global) can't move + // and don't need to be reported to the collector because they are always + // live. All constants have constant bases. Besides global references, all + // kinds of constants (e.g. undef, constant expressions, null pointers) can + // be introduced by the inliner or the optimizer, especially on dynamically + // dead paths. See e.g. test4 in constants.ll. return BaseDefiningValueResult(I, true); - // inlining could possibly introduce phi node that contains - // undef if callee has multiple returns - if (isa<UndefValue>(I)) - // utterly meaningless, but useful for dealing with - // partially optimized code. - return BaseDefiningValueResult(I, true); - - // Due to inheritance, this must be _after_ the global variable and undef - // checks - if (isa<Constant>(I)) { - assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && - "order of checks wrong!"); - // Note: Even for frontends which don't have constant references, we can - // see constants appearing after optimizations. A simple example is - // specialization of an address computation on null feeding into a merge - // point where the actual use of the now-constant input is protected by - // another null check. (e.g. test4 in constants.ll) - return BaseDefiningValueResult(I, true); - } - if (CastInst *CI = dyn_cast<CastInst>(I)) { Value *Def = CI->stripPointerCasts(); // If stripping pointer casts changes the address space there is an @@ -1642,33 +1627,24 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, DenseSet<Value *> &VisitedLiveValues) { for (User *U : GCRelocs) { - if (!isa<IntrinsicInst>(U)) + GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U); + if (!Relocate) continue; - IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U); - - // We only care about relocates - if (RelocatedValue->getIntrinsicID() != - Intrinsic::experimental_gc_relocate) { - continue; - } - - GCRelocateOperands RelocateOperands(RelocatedValue); - Value *OriginalValue = - const_cast<Value *>(RelocateOperands.getDerivedPtr()); + Value *OriginalValue = const_cast<Value *>(Relocate->getDerivedPtr()); assert(AllocaMap.count(OriginalValue)); Value *Alloca = AllocaMap[OriginalValue]; // Emit store into the related alloca // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to // the correct type according to alloca. - assert(RelocatedValue->getNextNode() && + assert(Relocate->getNextNode() && "Should always have one since it's not a terminator"); - IRBuilder<> Builder(RelocatedValue->getNextNode()); + IRBuilder<> Builder(Relocate->getNextNode()); Value *CastedRelocatedValue = - Builder.CreateBitCast(RelocatedValue, + Builder.CreateBitCast(Relocate, cast<AllocaInst>(Alloca)->getAllocatedType(), - suffixed_name_or(RelocatedValue, ".casted", "")); + suffixed_name_or(Relocate, ".casted", "")); StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca); Store->insertAfter(cast<Instruction>(CastedRelocatedValue)); diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 0914699..42287d3 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -74,17 +74,13 @@ namespace llvm { // insertFastDiv - Substitutes the div/rem instruction with code that checks the // value of the operands and uses a shorter-faster div/rem instruction when // possible and the longer-slower div/rem instruction otherwise. -static bool insertFastDiv(Function &F, - Function::iterator &I, - BasicBlock::iterator &J, - IntegerType *BypassType, - bool UseDivOp, - bool UseSignedOp, +static bool insertFastDiv(Instruction *I, IntegerType *BypassType, + bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache) { + Function *F = I->getParent()->getParent(); // Get instruction operands - Instruction *Instr = &*J; - Value *Dividend = Instr->getOperand(0); - Value *Divisor = Instr->getOperand(1); + Value *Dividend = I->getOperand(0); + Value *Divisor = I->getOperand(1); if (isa<ConstantInt>(Divisor) || (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { @@ -94,13 +90,12 @@ static bool insertFastDiv(Function &F, } // Basic Block is split before divide - BasicBlock *MainBB = &*I; - BasicBlock *SuccessorBB = I->splitBasicBlock(J); - ++I; //advance iterator I to successorBB + BasicBlock *MainBB = &*I->getParent(); + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); // Add new basic block for slow divide operation - BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", - MainBB->getParent(), SuccessorBB); + BasicBlock *SlowBB = + BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); SlowBB->moveBefore(SuccessorBB); IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); Value *SlowQuotientV; @@ -115,8 +110,8 @@ static bool insertFastDiv(Function &F, SlowBuilder.CreateBr(SuccessorBB); // Add new basic block for fast divide operation - BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", - MainBB->getParent(), SuccessorBB); + BasicBlock *FastBB = + BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); FastBB->moveBefore(SlowBB); IRBuilder<> FastBuilder(FastBB, FastBB->begin()); Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, @@ -139,19 +134,19 @@ static bool insertFastDiv(Function &F, // Phi nodes for result of div and rem IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); - PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); QuoPhi->addIncoming(SlowQuotientV, SlowBB); QuoPhi->addIncoming(FastQuotientV, FastBB); - PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); RemPhi->addIncoming(SlowRemainderV, SlowBB); RemPhi->addIncoming(FastRemainderV, FastBB); - // Replace Instr with appropriate phi node + // Replace I with appropriate phi node if (UseDivOp) - Instr->replaceAllUsesWith(QuoPhi); + I->replaceAllUsesWith(QuoPhi); else - Instr->replaceAllUsesWith(RemPhi); - Instr->eraseFromParent(); + I->replaceAllUsesWith(RemPhi); + I->eraseFromParent(); // Combine operands into a single value with OR for value testing below MainBB->getInstList().back().eraseFromParent(); @@ -168,9 +163,6 @@ static bool insertFastDiv(Function &F, Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); - // point iterator J at first instruction of successorBB - J = I->begin(); - // Cache phi nodes to be used later in place of other instances // of div or rem with the same sign, dividend, and divisor DivOpInfo Key(UseSignedOp, Dividend, Divisor); @@ -179,57 +171,54 @@ static bool insertFastDiv(Function &F, return true; } -// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if -// operands and operation are identical. Otherwise call insertFastDiv to perform -// the optimization and cache the resulting dividend and remainder. -static bool reuseOrInsertFastDiv(Function &F, - Function::iterator &I, - BasicBlock::iterator &J, - IntegerType *BypassType, - bool UseDivOp, - bool UseSignedOp, +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from +// the current BB if operands and operation are identical. Otherwise calls +// insertFastDiv to perform the optimization and caches the resulting dividend +// and remainder. +static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, + bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache) { // Get instruction operands - Instruction *Instr = &*J; - DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); if (CacheI == PerBBDivCache.end()) { // If previous instance does not exist, insert fast div - return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, - PerBBDivCache); + return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); } // Replace operation value with previously generated phi node DivPhiNodes &Value = CacheI->second; if (UseDivOp) { // Replace all uses of div instruction with quotient phi node - J->replaceAllUsesWith(Value.Quotient); + I->replaceAllUsesWith(Value.Quotient); } else { // Replace all uses of rem instruction with remainder phi node - J->replaceAllUsesWith(Value.Remainder); + I->replaceAllUsesWith(Value.Remainder); } - // Advance to next operation - ++J; - // Remove redundant operation - Instr->eraseFromParent(); + I->eraseFromParent(); return true; } -// bypassSlowDivision - This optimization identifies DIV instructions that can -// be profitably bypassed and carried out with a shorter, faster divide. -bool llvm::bypassSlowDivision(Function &F, - Function::iterator &I, - const DenseMap<unsigned int, unsigned int> &BypassWidths) { +// bypassSlowDivision - This optimization identifies DIV instructions in a BB +// that can be profitably bypassed and carried out with a shorter, faster +// divide. +bool llvm::bypassSlowDivision( + BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) { DivCacheTy DivCache; bool MadeChange = false; - for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + Instruction* Next = &*BB->begin(); + while (Next != nullptr) { + // We may add instructions immediately after I, but we want to skip over + // them. + Instruction* I = Next; + Next = Next->getNextNode(); // Get instruction details - unsigned Opcode = J->getOpcode(); + unsigned Opcode = I->getOpcode(); bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; bool UseSignedOp = Opcode == Instruction::SDiv || @@ -240,11 +229,11 @@ bool llvm::bypassSlowDivision(Function &F, continue; // Skip division on vector types, only optimize integer instructions - if (!J->getType()->isIntegerTy()) + if (!I->getType()->isIntegerTy()) continue; // Get bitwidth of div/rem instruction - IntegerType *T = cast<IntegerType>(J->getType()); + IntegerType *T = cast<IntegerType>(I->getType()); unsigned int bitwidth = T->getBitWidth(); // Continue if bitwidth is not bypassed @@ -253,10 +242,9 @@ bool llvm::bypassSlowDivision(Function &F, continue; // Get type for div/rem instruction with bypass bitwidth - IntegerType *BT = IntegerType::get(J->getContext(), BI->second); + IntegerType *BT = IntegerType::get(I->getContext(), BI->second); - MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, - UseSignedOp, DivCache); + MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); } return MadeChange; diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index e75163f..0e386ac 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1305,8 +1305,9 @@ static bool markAliveBlocks(Function &F, } } - // Turn invokes that call 'nounwind' functions into ordinary calls. - if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + TerminatorInst *Terminator = BB->getTerminator(); + if (auto *II = dyn_cast<InvokeInst>(Terminator)) { + // Turn invokes that call 'nounwind' functions into ordinary calls. Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { changeToUnreachable(II, true); @@ -1321,6 +1322,44 @@ static bool markAliveBlocks(Function &F, changeToCall(II); Changed = true; } + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) { + // Remove catchpads which cannot be reached. + struct CatchPadDenseMapInfo { + static CatchPadInst *getEmptyKey() { + return DenseMapInfo<CatchPadInst *>::getEmptyKey(); + } + static CatchPadInst *getTombstoneKey() { + return DenseMapInfo<CatchPadInst *>::getTombstoneKey(); + } + static unsigned getHashValue(CatchPadInst *CatchPad) { + return static_cast<unsigned>(hash_combine_range( + CatchPad->value_op_begin(), CatchPad->value_op_end())); + } + static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) + return LHS == RHS; + return LHS->isIdenticalTo(RHS); + } + }; + + // Set of unique CatchPads. + SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, + CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> + HandlerSet; + detail::DenseSetEmpty Empty; + for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(), + E = CatchSwitch->handler_end(); + I != E; ++I) { + BasicBlock *HandlerBB = *I; + auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); + if (!HandlerSet.insert({CatchPad, Empty}).second) { + CatchSwitch->removeHandler(I); + --I; + --E; + Changed = true; + } + } } Changed |= ConstantFoldTerminator(BB, true); @@ -1514,8 +1553,8 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { return true; // Check if the function is specifically marked as a gc leaf function. - // - // TODO: we should be checking the attributes on the call site as well. + if (CS.hasFnAttr("gc-leaf-function")) + return true; if (const Function *F = CS.getCalledFunction()) return F->hasFnAttribute("gc-leaf-function"); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index d0932f83..3bb3fa5 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3448,18 +3449,26 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; if (BBI->mayHaveSideEffects()) { - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (auto *SI = dyn_cast<StoreInst>(BBI)) { if (SI->isVolatile()) break; - } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + } else if (auto *LI = dyn_cast<LoadInst>(BBI)) { if (LI->isVolatile()) break; - } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { + } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { if (RMWI->isVolatile()) break; - } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { + } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { if (CXI->isVolatile()) break; + } else if (isa<CatchPadInst>(BBI)) { + // A catchpad may invoke exception object constructors and such, which + // in some languages can be arbitrary code, so be conservative by + // default. + // For CoreCLR, it just involves a type test, so can be removed. + if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) != + EHPersonality::CoreCLR) + break; } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && !isa<LandingPadInst>(BBI)) { break; @@ -3485,7 +3494,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); IRBuilder<> Builder(TI); - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (auto *BI = dyn_cast<BranchInst>(TI)) { if (BI->isUnconditional()) { if (BI->getSuccessor(0) == BB) { new UnreachableInst(TI->getContext(), TI); @@ -3502,7 +3511,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { Changed = true; } } - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) if (i.getCaseSuccessor() == BB) { @@ -3511,18 +3520,49 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { --i; --e; Changed = true; } - } else if ((isa<InvokeInst>(TI) && - cast<InvokeInst>(TI)->getUnwindDest() == BB) || - isa<CatchSwitchInst>(TI)) { - removeUnwindEdge(TI->getParent()); - Changed = true; + } else if (auto *II = dyn_cast<InvokeInst>(TI)) { + if (II->getUnwindDest() == BB) { + removeUnwindEdge(TI->getParent()); + Changed = true; + } + } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { + if (CSI->getUnwindDest() == BB) { + removeUnwindEdge(TI->getParent()); + Changed = true; + continue; + } + + for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(), + E = CSI->handler_end(); + I != E; ++I) { + if (*I == BB) { + CSI->removeHandler(I); + --I; + --E; + Changed = true; + } + } + if (CSI->getNumHandlers() == 0) { + BasicBlock *CatchSwitchBB = CSI->getParent(); + if (CSI->hasUnwindDest()) { + // Redirect preds to the unwind dest + CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest()); + } else { + // Rewrite all preds to unwind to caller (or from invoke to call). + SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB)); + for (BasicBlock *EHPred : EHPreds) + removeUnwindEdge(EHPred); + } + // The catchswitch is no longer reachable. + new UnreachableInst(CSI->getContext(), CSI); + CSI->eraseFromParent(); + Changed = true; + } } else if (isa<CleanupReturnInst>(TI)) { new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } - // TODO: We can remove a catchswitch if all it's catchpads end in - // unreachable. } // If this block is now dead, remove it. diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 81dea6d..dc5fee5 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -57,8 +57,7 @@ static bool ignoreCallingConv(LibFunc::Func Func) { Func == LibFunc::llabs || Func == LibFunc::strlen; } -/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. +/// Return true if it only matters that the value is equal or not-equal to zero. static bool isOnlyUsedInZeroEqualityComparison(Value *V) { for (User *U : V->users()) { if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) @@ -72,8 +71,7 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) { return true; } -/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality -/// comparisons with With. +/// Return true if it is only used in equality comparisons with With. static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { for (User *U : V->users()) { if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) @@ -249,12 +247,12 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { !FT->getParamType(2)->isIntegerTy()) return nullptr; - // Extract some information from the instruction + // Extract some information from the instruction. Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); uint64_t Len; - // We don't do anything if length is not constant + // We don't do anything if length is not constant. if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else @@ -272,12 +270,12 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { if (SrcLen == 0 || Len == 0) return Dst; - // We don't optimize this case + // We don't optimize this case. if (Len < SrcLen) return nullptr; // strncat(x, s, c) -> strcat(x, s) - // s is constant so the strcat can be optimized further + // s is constant so the strcat can be optimized further. return emitStrLenMemCpy(Src, Dst, SrcLen, B); } @@ -310,7 +308,8 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) - return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), + "strchr"); return nullptr; } @@ -490,8 +489,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Type *PT = Callee->getFunctionType()->getParamType(0); Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); - Value *DstEnd = - B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); + Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst, + ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -599,7 +598,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { if (I == StringRef::npos) // No match. return Constant::getNullValue(CI->getType()); - return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), + "strpbrk"); } // strpbrk(s, "a") -> strchr(s, 'a') @@ -878,8 +878,10 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Type *RHSPtrTy = IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); - Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); - Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); + Value *LHSV = + B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); + Value *RHSV = + B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); } @@ -992,6 +994,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, Value *V = valueHasFloatPrecision(CI->getArgOperand(0)); if (V == nullptr) return nullptr; + + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.SetFastMathFlags(CI->getFastMathFlags()); // floor((double)floatval) -> (double)floorf(floatval) if (Callee->isIntrinsic()) { @@ -1027,6 +1033,10 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { if (V2 == nullptr) return nullptr; + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.SetFastMathFlags(CI->getFastMathFlags()); + // fmin((double)floatval1, (double)floatval2) // -> (double)fminf(floatval1, floatval2) // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP(). @@ -1117,7 +1127,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Callee->getAttributes()); } - bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); + bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); // pow(exp(x), y) -> exp(x*y) // pow(exp2(x), y) -> exp2(x * y) @@ -1126,7 +1136,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // underflow behavior quite dramatically. // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). - if (unsafeFPMath) { + if (UnsafeFPMath) { if (auto *OpC = dyn_cast<CallInst>(Op1)) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; @@ -1157,7 +1167,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc::fabsl)) { // In -ffast-math, pow(x, 0.5) -> sqrt(x). - if (unsafeFPMath) + if (UnsafeFPMath) return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, Callee->getAttributes()); @@ -1183,7 +1193,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); // In -ffast-math, generate repeated fmul instead of generating pow(x, n). - if (unsafeFPMath) { + if (UnsafeFPMath) { APFloat V = abs(Op2C->getValueAPF()); // We limit to a max of 7 fmul(s). Thus max exponent is 32. // This transformation applies to integer exponents only. @@ -1291,12 +1301,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { // function, do that first. Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - if ((Name == "fmin" && hasFloatVersion(Name)) || - (Name == "fmax" && hasFloatVersion(Name))) { - Value *Ret = optimizeBinaryDoubleFP(CI, B); - if (Ret) + if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) + if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) return Ret; - } // Make sure this has 2 arguments of FP type which match the result type. FunctionType *FT = Callee->getFunctionType(); @@ -1307,14 +1314,12 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; - Function *F = CI->getParent()->getParent(); - if (canUseUnsafeFPMath(F)) { + if (CI->hasUnsafeAlgebra()) { // Unsafe algebra sets all fast-math-flags to true. FMF.setUnsafeAlgebra(); } else { // At a minimum, no-nans-fp-math must be true. - Attribute Attr = F->getFnAttribute("no-nans-fp-math"); - if (Attr.getValueAsString() != "true") + if (!CI->hasNoNaNs()) return nullptr; // No-signed-zeros is implied by the definitions of fmax/fmin themselves: // "Ideally, fmax would be sensitive to the sign of zero, for example @@ -2169,7 +2174,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); StringRef FuncName = Callee->getName(); - IRBuilder<> Builder(CI); + + SmallVector<OperandBundleDef, 2> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; // Command-line parameter overrides function attribute. @@ -2419,7 +2427,8 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, return false; } -Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, + IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk)) @@ -2433,7 +2442,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> & return nullptr; } -Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, + IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk)) @@ -2447,7 +2457,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> return nullptr; } -Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, + IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk)) @@ -2539,7 +2550,10 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); StringRef FuncName = Callee->getName(); - IRBuilder<> Builder(CI); + + SmallVector<OperandBundleDef, 2> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; // First, check that this is a known library functions. diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 1add78e..2e361d3 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -218,12 +218,12 @@ static Metadata *mapMetadataOp(Metadata *Op, } /// Resolve uniquing cycles involving the given metadata. -static void resolveCycles(Metadata *MD, bool MDMaterialized) { +static void resolveCycles(Metadata *MD, bool AllowTemps) { if (auto *N = dyn_cast_or_null<MDNode>(MD)) { - if (!MDMaterialized && N->isTemporary()) + if (AllowTemps && N->isTemporary()) return; if (!N->isResolved()) - N->resolveCycles(MDMaterialized); + N->resolveCycles(AllowTemps); } } @@ -253,7 +253,7 @@ static bool remapOperands(MDNode &Node, // Resolve uniquing cycles underneath distinct nodes on the fly so they // don't infect later operands. if (IsDistinct) - resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata)); + resolveCycles(New, Flags & RF_HaveUnmaterializedMetadata); } } @@ -401,7 +401,7 @@ Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM, return NewMD; // Resolve cycles involving the entry metadata. - resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata)); + resolveCycles(NewMD, Flags & RF_HaveUnmaterializedMetadata); // Remap the operands of distinct MDNodes. while (!DistinctWorklist.empty()) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index a627dd6..2c0d317 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4294,12 +4294,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { continue; } - if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, - Reductions[Phi])) { - if (Reductions[Phi].hasUnsafeAlgebra()) - Requirements->addUnsafeAlgebraInst( - Reductions[Phi].getUnsafeAlgebraInst()); - AllowedExit.insert(Reductions[Phi].getLoopExitInstr()); + RecurrenceDescriptor RedDes; + if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) { + if (RedDes.hasUnsafeAlgebra()) + Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); + AllowedExit.insert(RedDes.getLoopExitInstr()); + Reductions[Phi] = RedDes; continue; } |