diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 16 | ||||
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp | 72 | ||||
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/Local.cpp | 31 | ||||
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp | 4 | ||||
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp | 2 | ||||
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 101 | ||||
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 165 | ||||
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp | 13 |
8 files changed, 240 insertions, 164 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index a5137e9..72db980 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -626,11 +626,17 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, Clone2->setName(Twine("lpad") + Suffix2); NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); - // Create a PHI node for the two cloned landingpad instructions. - PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); - PN->addIncoming(Clone1, NewBB1); - PN->addIncoming(Clone2, NewBB2); - LPad->replaceAllUsesWith(PN); + // Create a PHI node for the two cloned landingpad instructions only + // if the original landingpad instruction has some uses. + if (!LPad->use_empty()) { + assert(!LPad->getType()->isTokenTy() && + "Split cannot be applied if LPad is token type. Otherwise an " + "invalid PHINode of token type would be created."); + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + } LPad->eraseFromParent(); } else { // There is no second clone. Just replace the landing pad with the first diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 854a3b8..6454afb 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -266,27 +266,14 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - CloningDirector *Director; - ValueMapTypeRemapper *TypeMapper; - ValueMaterializer *Materializer; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, - const char *nameSuffix, ClonedCodeInfo *codeInfo, - CloningDirector *Director) + const char *nameSuffix, ClonedCodeInfo *codeInfo) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo), Director(Director) { - // These are optional components. The Director may return null. - if (Director) { - TypeMapper = Director->getTypeRemapper(); - Materializer = Director->getValueMaterializer(); - } else { - TypeMapper = nullptr; - Materializer = nullptr; - } - } + CodeInfo(codeInfo) {} /// The specified block is found to be reachable, clone it and /// anything that it can reach. @@ -332,23 +319,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // loop doesn't include the terminator. for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { - // If the "Director" remaps the instruction, don't clone it. - if (Director) { - CloningDirector::CloningAction Action = - Director->handleInstruction(VMap, &*II, NewBB); - // If the cloning director says stop, we want to stop everything, not - // just break out of the loop (which would cause the terminator to be - // cloned). The cloning director is responsible for inserting a proper - // terminator into the new basic block in this case. - if (Action == CloningDirector::StopCloningBB) - return; - // If the cloning director says skip, continue to the next instruction. - // In this case, the cloning director is responsible for mapping the - // skipped instruction to some value that is defined in the new - // basic block. - if (Action == CloningDirector::SkipInstruction) - continue; - } Instruction *NewInst = II->clone(); @@ -356,8 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // nodes for which we defer processing until we update the CFG. if (!isa<PHINode>(NewInst)) { RemapInstruction(NewInst, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into @@ -397,26 +366,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; - if (Director) { - CloningDirector::CloningAction Action - = Director->handleInstruction(VMap, OldTI, NewBB); - // If the cloning director says stop, we want to stop everything, not - // just break out of the loop (which would cause the terminator to be - // cloned). The cloning director is responsible for inserting a proper - // terminator into the new basic block in this case. - if (Action == CloningDirector::StopCloningBB) - return; - if (Action == CloningDirector::CloneSuccessors) { - // If the director says to skip with a terminate instruction, we still - // need to clone this block's successors. - const TerminatorInst *TI = NewBB->getTerminator(); - for (const BasicBlock *Succ : TI->successors()) - ToClone.push_back(Succ); - return; - } - assert(Action != CloningDirector::SkipInstruction && - "SkipInstruction is not valid for terminators."); - } if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { if (BI->isConditional()) { // If the condition was a known constant in the callee... @@ -485,19 +434,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo, - CloningDirector *Director) { + const char *NameSuffix, + ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; - if (Director) { - TypeMapper = Director->getTypeRemapper(); - Materializer = Director->getValueMaterializer(); - } - #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. @@ -507,7 +450,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo, Director); + NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); @@ -731,8 +674,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ClonedCodeInfo *CodeInfo, Instruction *TheCall) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, - ModuleLevelChanges, Returns, NameSuffix, CodeInfo, - nullptr); + ModuleLevelChanges, Returns, NameSuffix, CodeInfo); } /// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 0e386ac..d2793e5 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -1051,9 +1052,31 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); - if (ExtendedArg) - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, + if (ExtendedArg) { + // We're now only describing a subset of the variable. The piece we're + // describing will always be smaller than the variable size, because + // VariableSize == Size of Alloca described by DDI. Since SI stores + // to the alloca described by DDI, if it's first operand is an extend, + // we're guaranteed that before extension, the value was narrower than + // the size of the alloca, hence the size of the described variable. + SmallVector<uint64_t, 3> NewDIExpr; + unsigned PieceOffset = 0; + // If this already is a bit piece, we drop the bit piece from the expression + // and record the offset. + if (DIExpr->isBitPiece()) { + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); + PieceOffset = DIExpr->getBitPieceOffset(); + } else { + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + } + NewDIExpr.push_back(dwarf::DW_OP_bit_piece); + NewDIExpr.push_back(PieceOffset); //Offset + const DataLayout &DL = DDI->getModule()->getDataLayout(); + NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, + Builder.createExpression(NewDIExpr), DDI->getDebugLoc(), SI); + } else Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, DDI->getDebugLoc(), SI); @@ -1407,7 +1430,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { /// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -bool llvm::removeUnreachableBlocks(Function &F) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { SmallPtrSet<BasicBlock*, 128> Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -1428,6 +1451,8 @@ bool llvm::removeUnreachableBlocks(Function &F) { ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(&*BB); + if (LVI) + LVI->eraseBlock(&*BB); BB->dropAllReferences(); } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 2499b88..eea9237 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -528,7 +528,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Loop *OuterL = L->getParentLoop(); // Update LoopInfo if the loop is completely removed. if (CompletelyUnroll) - LI->updateUnloop(L);; + LI->markAsRemoved(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify @@ -542,7 +542,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after - // LoopInfo's been updated by updateUnloop. + // LoopInfo's been updated by markAsRemoved. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index e038805..fa958e9 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -599,7 +599,7 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, IRBuilder<>::FastMathFlagGuard FMFG(Builder); FastMathFlags FMF; FMF.setUnsafeAlgebra(); - Builder.SetFastMathFlags(FMF); + Builder.setFastMathFlags(FMF); Value *Cmp; if (RK == MRK_FloatMin || RK == MRK_FloatMax) diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3bb3fa5..3125a2c 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -141,6 +141,8 @@ class SimplifyCFGOpt { bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool SimplifySingleResume(ResumeInst *RI); + bool SimplifyCommonResume(ResumeInst *RI); bool SimplifyCleanupReturn(CleanupReturnInst *RI); bool SimplifyUnreachable(UnreachableInst *UI); bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); @@ -3239,14 +3241,101 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, } bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { - // If this is a trivial landing pad that just continues unwinding the caught - // exception then zap the landing pad, turning its invokes into calls. + if (isa<PHINode>(RI->getValue())) + return SimplifyCommonResume(RI); + else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) && + RI->getValue() == RI->getParent()->getFirstNonPHI()) + // The resume must unwind the exception that caused control to branch here. + return SimplifySingleResume(RI); + + return false; +} + +// Simplify resume that is shared by several landing pads (phi of landing pad). +bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { + BasicBlock *BB = RI->getParent(); + + // Check that there are no other instructions except for debug intrinsics + // between the phi of landing pads (RI->getValue()) and resume instruction. + BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(), + E = RI->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + SmallSet<BasicBlock *, 4> TrivialUnwindBlocks; + auto *PhiLPInst = cast<PHINode>(RI->getValue()); + + // Check incoming blocks to see if any of them are trivial. + for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); + Idx != End; Idx++) { + auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx); + auto *IncomingValue = PhiLPInst->getIncomingValue(Idx); + + // If the block has other successors, we can not delete it because + // it has other dependents. + if (IncomingBB->getUniqueSuccessor() != BB) + continue; + + auto *LandingPad = + dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI()); + // Not the landing pad that caused the control to branch here. + if (IncomingValue != LandingPad) + continue; + + bool isTrivial = true; + + I = IncomingBB->getFirstNonPHI()->getIterator(); + E = IncomingBB->getTerminator()->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) { + isTrivial = false; + break; + } + + if (isTrivial) + TrivialUnwindBlocks.insert(IncomingBB); + } + + // If no trivial unwind blocks, don't do any simplifications. + if (TrivialUnwindBlocks.empty()) return false; + + // Turn all invokes that unwind here into calls. + for (auto *TrivialBB : TrivialUnwindBlocks) { + // Blocks that will be simplified should be removed from the phi node. + // Note there could be multiple edges to the resume block, and we need + // to remove them all. + while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1) + BB->removePredecessor(TrivialBB, true); + + for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); + PI != PE;) { + BasicBlock *Pred = *PI++; + removeUnwindEdge(Pred); + } + + // In each SimplifyCFG run, only the current processed block can be erased. + // Otherwise, it will break the iteration of SimplifyCFG pass. So instead + // of erasing TrivialBB, we only remove the branch to the common resume + // block so that we can later erase the resume block since it has no + // predecessors. + TrivialBB->getTerminator()->eraseFromParent(); + new UnreachableInst(RI->getContext(), TrivialBB); + } + + // Delete the resume block if all its predecessors have been removed. + if (pred_empty(BB)) + BB->eraseFromParent(); + + return !TrivialUnwindBlocks.empty(); +} + +// Simplify resume that is only used by a single (non-phi) landing pad. +bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); - if (RI->getValue() != LPInst) - // Not a landing pad, or the resume is not unwinding the exception that - // caused control to branch here. - return false; + assert (RI->getValue() == LPInst && + "Resume must unwind the exception that caused control to here"); // Check that there are no other instructions except for debug intrinsics. BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator(); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index dc5fee5..dc07440 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -997,7 +997,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(CI->getFastMathFlags()); + B.setFastMathFlags(CI->getFastMathFlags()); // floor((double)floatval) -> (double)floorf(floatval) if (Callee->isIntrinsic()) { @@ -1035,7 +1035,7 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(CI->getFastMathFlags()); + B.setFastMathFlags(CI->getFastMathFlags()); // fmin((double)floatval1, (double)floatval2) // -> (double)fminf(floatval1, floatval2) @@ -1127,29 +1127,26 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Callee->getAttributes()); } + // FIXME: Use instruction-level FMF. bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); - // pow(exp(x), y) -> exp(x*y) + // pow(exp(x), y) -> exp(x * y) // pow(exp2(x), y) -> exp2(x * y) - // We enable these only under fast-math. Besides rounding - // differences the transformation changes overflow and - // underflow behavior quite dramatically. + // We enable these only with fast-math. Besides rounding differences, the + // transformation changes overflow and underflow behavior quite dramatically. // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). - if (UnsafeFPMath) { - if (auto *OpC = dyn_cast<CallInst>(Op1)) { + auto *OpC = dyn_cast<CallInst>(Op1); + if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) { + LibFunc::Func Func; + Function *OpCCallee = OpC->getCalledFunction(); + if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && + TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) { IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - FMF.setUnsafeAlgebra(); - B.SetFastMathFlags(FMF); - - LibFunc::Func Func; - Function *OpCCallee = OpC->getCalledFunction(); - if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && - TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) - return EmitUnaryFloatFnCall( - B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"), - OpCCallee->getName(), B, OpCCallee->getAttributes()); + B.setFastMathFlags(CI->getFastMathFlags()); + Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); + return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, + OpCCallee->getAttributes()); } } @@ -1167,9 +1164,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc::fabsl)) { // In -ffast-math, pow(x, 0.5) -> sqrt(x). - if (UnsafeFPMath) + if (CI->hasUnsafeAlgebra()) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, Callee->getAttributes()); + } // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero @@ -1328,7 +1328,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { FMF.setNoSignedZeros(); FMF.setNoNaNs(); } - B.SetFastMathFlags(FMF); + B.setFastMathFlags(FMF); // We have a relaxed floating-point environment. We can ignore NaN-handling // and transform to a compare and select. We do not have to consider errno or @@ -1354,11 +1354,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { !FT->getParamType(0)->isFloatingPointTy()) return Ret; - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + if (!CI->hasUnsafeAlgebra()) return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); - if (!OpC) + + // The earlier call must also be unsafe in order to do these transforms. + if (!OpC || !OpC->hasUnsafeAlgebra()) return Ret; // log(pow(x,y)) -> y*log(x) @@ -1369,7 +1371,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; FMF.setUnsafeAlgebra(); - B.SetFastMathFlags(FMF); + B.setFastMathFlags(FMF); LibFunc::Func Func; Function *F = OpC->getCalledFunction(); @@ -1397,66 +1399,67 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + + if (!CI->hasUnsafeAlgebra()) return Ret; - Value *Op = CI->getArgOperand(0); - if (Instruction *I = dyn_cast<Instruction>(Op)) { - if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) { - // We're looking for a repeated factor in a multiplication tree, - // so we can do this fold: sqrt(x * x) -> fabs(x); - // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y). - Value *Op0 = I->getOperand(0); - Value *Op1 = I->getOperand(1); - Value *RepeatOp = nullptr; - Value *OtherOp = nullptr; - if (Op0 == Op1) { - // Simple match: the operands of the multiply are identical. - RepeatOp = Op0; - } else { - // Look for a more complicated pattern: one of the operands is itself - // a multiply, so search for a common factor in that multiply. - // Note: We don't bother looking any deeper than this first level or for - // variations of this pattern because instcombine's visitFMUL and/or the - // reassociation pass should give us this form. - Value *OtherMul0, *OtherMul1; - if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { - // Pattern: sqrt((x * y) * z) - if (OtherMul0 == OtherMul1) { - // Matched: sqrt((x * x) * z) - RepeatOp = OtherMul0; - OtherOp = Op1; - } - } - } - if (RepeatOp) { - // Fast math flags for any created instructions should match the sqrt - // and multiply. - // FIXME: We're not checking the sqrt because it doesn't have - // fast-math-flags (see earlier comment). - IRBuilder<>::FastMathFlagGuard Guard(B); - B.SetFastMathFlags(I->getFastMathFlags()); - // If we found a repeated factor, hoist it out of the square root and - // replace it with the fabs of that factor. - Module *M = Callee->getParent(); - Type *ArgType = Op->getType(); - Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); - Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); - if (OtherOp) { - // If we found a non-repeated factor, we still need to get its square - // root. We then multiply that by the value that was simplified out - // of the square root calculation. - Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); - Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); - return B.CreateFMul(FabsCall, SqrtCall); - } - return FabsCall; + Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0)); + if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + return Ret; + + // We're looking for a repeated factor in a multiplication tree, + // so we can do this fold: sqrt(x * x) -> fabs(x); + // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y). + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + Value *RepeatOp = nullptr; + Value *OtherOp = nullptr; + if (Op0 == Op1) { + // Simple match: the operands of the multiply are identical. + RepeatOp = Op0; + } else { + // Look for a more complicated pattern: one of the operands is itself + // a multiply, so search for a common factor in that multiply. + // Note: We don't bother looking any deeper than this first level or for + // variations of this pattern because instcombine's visitFMUL and/or the + // reassociation pass should give us this form. + Value *OtherMul0, *OtherMul1; + if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { + // Pattern: sqrt((x * y) * z) + if (OtherMul0 == OtherMul1 && + cast<Instruction>(Op0)->hasUnsafeAlgebra()) { + // Matched: sqrt((x * x) * z) + RepeatOp = OtherMul0; + OtherOp = Op1; } } } - return Ret; -} + if (!RepeatOp) + return Ret; + // Fast math flags for any created instructions should match the sqrt + // and multiply. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(I->getFastMathFlags()); + + // If we found a repeated factor, hoist it out of the square root and + // replace it with the fabs of that factor. + Module *M = Callee->getParent(); + Type *ArgType = I->getType(); + Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); + Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); + if (OtherOp) { + // If we found a non-repeated factor, we still need to get its square + // root. We then multiply that by the value that was simplified out + // of the square root calculation. + Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); + Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); + return B.CreateFMul(FabsCall, SqrtCall); + } + return FabsCall; +} + +// TODO: Generalize to handle any trig function and its inverse. Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1471,13 +1474,15 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { !FT->getParamType(0)->isFloatingPointTy()) return Ret; - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) - return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); if (!OpC) return Ret; + // Both calls must allow unsafe optimizations in order to remove them. + if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra()) + return Ret; + // tan(atan(x)) -> x // tanf(atanf(x)) -> x // tanl(atanl(x)) -> x diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 2e361d3..f47ddb9 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -222,8 +222,17 @@ static void resolveCycles(Metadata *MD, bool AllowTemps) { if (auto *N = dyn_cast_or_null<MDNode>(MD)) { if (AllowTemps && N->isTemporary()) return; - if (!N->isResolved()) - N->resolveCycles(AllowTemps); + if (!N->isResolved()) { + if (AllowTemps) + // Note that this will drop RAUW support on any temporaries, which + // blocks uniquing. If this ends up being an issue, in the future + // we can experiment with delaying resolving these nodes until + // after metadata is fully materialized (i.e. when linking metadata + // as a postpass after function importing). + N->resolveNonTemporaries(); + else + N->resolveCycles(); + } } } |