summaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/InferFunctionAttrs.cpp53
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp16
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp29
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp5
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp10
-rw-r--r--lib/Transforms/Scalar/LICM.cpp115
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp182
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp98
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp34
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp52
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp100
-rw-r--r--lib/Transforms/Utils/Local.cpp47
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp66
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp78
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp10
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp12
18 files changed, 630 insertions, 282 deletions
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index d02c861..4295a75 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -10,6 +10,7 @@
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
@@ -21,10 +22,12 @@ using namespace llvm;
STATISTIC(NumReadNone, "Number of functions inferred as readnone");
STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
static bool setDoesNotAccessMemory(Function &F) {
if (F.doesNotAccessMemory())
@@ -42,6 +45,15 @@ static bool setOnlyReadsMemory(Function &F) {
return true;
}
+static bool setOnlyAccessesArgMemory(Function &F) {
+ if (F.onlyAccessesArgMemory())
+ return false;
+ F.setOnlyAccessesArgMemory ();
+ ++NumArgMemOnly;
+ return true;
+}
+
+
static bool setDoesNotThrow(Function &F) {
if (F.doesNotThrow())
return false;
@@ -74,6 +86,17 @@ static bool setDoesNotAlias(Function &F, unsigned n) {
return true;
}
+static bool setNonNull(Function &F, unsigned n) {
+ assert((n != AttributeSet::ReturnIndex ||
+ F.getReturnType()->isPointerTy()) &&
+ "nonnull applies only to pointers");
+ if (F.getAttributes().hasAttribute(n, Attribute::NonNull))
+ return false;
+ F.addAttribute(n, Attribute::NonNull);
+ ++NumNonNull;
+ return true;
+}
+
/// Analyze the name and prototype of the given function and set any applicable
/// attributes.
///
@@ -89,7 +112,6 @@ static bool inferPrototypeAttributes(Function &F,
return false;
bool Changed = false;
-
switch (TheLibFunc) {
case LibFunc::strlen:
if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
@@ -873,6 +895,35 @@ static bool inferPrototypeAttributes(Function &F,
Changed |= setDoesNotCapture(F, 2);
return Changed;
+ case LibFunc::Znwj: // new(unsigned int)
+ case LibFunc::Znwm: // new(unsigned long)
+ case LibFunc::Znaj: // new[](unsigned int)
+ case LibFunc::Znam: // new[](unsigned long)
+ case LibFunc::msvc_new_int: // new(unsigned int)
+ case LibFunc::msvc_new_longlong: // new(unsigned long long)
+ case LibFunc::msvc_new_array_int: // new[](unsigned int)
+ case LibFunc::msvc_new_array_longlong: // new[](unsigned long long)
+ if (FTy->getNumParams() != 1)
+ return false;
+ // Operator new always returns a nonnull noalias pointer
+ Changed |= setNonNull(F, AttributeSet::ReturnIndex);
+ Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex);
+ return Changed;
+
+ //TODO: add LibFunc entries for:
+ //case LibFunc::memset_pattern4:
+ //case LibFunc::memset_pattern8:
+ case LibFunc::memset_pattern16:
+ if (FTy->isVarArg() || FTy->getNumParams() != 3 ||
+ !isa<PointerType>(FTy->getParamType(0)) ||
+ !isa<PointerType>(FTy->getParamType(1)) ||
+ !isa<IntegerType>(FTy->getParamType(2)))
+ return false;
+
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+
default:
// FIXME: It'd be really nice to cover all the library functions we're
// aware of here.
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e3634f2..090245d 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1747,8 +1747,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Translate facts known about a pointer before relocating into
// facts about the relocate value, while being careful to
// preserve relocation semantics.
- GCRelocateOperands Operands(II);
- Value *DerivedPtr = Operands.getDerivedPtr();
+ Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
auto *GCRelocateType = cast<PointerType>(II->getType());
// Remove the relocation if unused, note that this check is required
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index da835a1..0f01d18 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -591,19 +591,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
- (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+ (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
if (!DoXform) return ICI;
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
- In->getType()->getScalarSizeInBits()-1);
- In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
+ In->getType()->getScalarSizeInBits() - 1);
+ In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit");
if (In->getType() != CI.getType())
In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
Constant *One = ConstantInt::get(In->getType(), 1);
- In = Builder->CreateXor(In, One, In->getName()+".not");
+ In = Builder->CreateXor(In, One, In->getName() + ".not");
}
return ReplaceInstUsesWith(CI, In);
@@ -639,13 +639,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
return ReplaceInstUsesWith(CI, Res);
}
- uint32_t ShiftAmt = KnownZeroMask.logBase2();
+ uint32_t ShAmt = KnownZeroMask.logBase2();
Value *In = ICI->getOperand(0);
- if (ShiftAmt) {
+ if (ShAmt) {
// Perform a logical shr by shiftamt.
// Insert the shift to put the result in the low bit.
- In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
- In->getName()+".lobit");
+ In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
+ In->getName() + ".lobit");
}
if ((Op1CV != 0) == isNE) { // Toggle the low bit.
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 534f670..e4e5065 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -414,7 +414,7 @@ public:
/// \brief A combiner-aware RAUW-like routine.
///
/// This method is to be used when an instruction is found to be dead,
- /// replacable with another preexisting expression. Here we add all uses of
+ /// replaceable with another preexisting expression. Here we add all uses of
/// I to the worklist, replace all uses of I with the new value, then return
/// I, so that the inst combiner will know that I was modified.
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index e25639a..54a9fbd 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -383,15 +383,28 @@ static void replaceExtractElements(InsertElementInst *InsElt,
auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
ConstantVector::get(ExtendMask));
- // Replace all extracts from the original narrow vector with extracts from
- // the new wide vector.
- WideVec->insertBefore(ExtElt);
+ // Insert the new shuffle after the vector operand of the extract is defined
+ // or at the start of the basic block, so any subsequent extracts can use it.
+ bool ReplaceAllExtUsers;
+ if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) {
+ WideVec->insertAfter(ExtVecOpInst);
+ ReplaceAllExtUsers = true;
+ } else {
+ // TODO: Insert at start of function, so it's always safe to replace all?
+ IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
+ ReplaceAllExtUsers = false;
+ }
+
+ // Replace extracts from the original narrow vector with extracts from the new
+ // wide vector.
for (User *U : ExtVecOp->users()) {
- if (ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U)) {
- auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
- NewExt->insertAfter(WideVec);
- IC.ReplaceInstUsesWith(*OldExt, NewExt);
- }
+ ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
+ if (!OldExt ||
+ (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent()))
+ continue;
+ auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
+ NewExt->insertAfter(WideVec);
+ IC.ReplaceInstUsesWith(*OldExt, NewExt);
}
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7c46cfd..903a0b5 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3021,7 +3021,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
Instruction *Inst = &*--EndInst->getIterator();
if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (Inst->isEHPad()) {
+ if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
EndInst = Inst;
continue;
}
@@ -3029,8 +3029,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
++NumDeadInst;
MadeIRChange = true;
}
- if (!Inst->getType()->isTokenTy())
- Inst->eraseFromParent();
+ Inst->eraseFromParent();
}
}
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 92e41ee..51ff95d 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -234,16 +234,14 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
}
void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
- CoverageData->setSection(getCoverageSection());
- CoverageData->setAlignment(8);
Constant *Init = CoverageData->getInitializer();
- // We're expecting { i32, i32, i32, i32, [n x { i8*, i32, i32 }], [m x i8] }
+ // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] }
// for some C. If not, the frontend's given us something broken.
- assert(Init->getNumOperands() == 6 && "bad number of fields in coverage map");
- assert(isa<ConstantArray>(Init->getAggregateElement(4)) &&
+ assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map");
+ assert(isa<ConstantArray>(Init->getAggregateElement(1)) &&
"invalid function list in coverage map");
- ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(4));
+ ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1));
for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) {
Constant *Record = Records->getOperand(I);
Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts();
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 6d70cdc..e01e23f 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -75,10 +75,12 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
cl::desc("Disable memory promotion in LICM pass"));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
-static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop);
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
+ const LICMSafetyInfo *SafetyInfo);
static bool hoist(Instruction &I, BasicBlock *Preheader);
static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
- const Loop *CurLoop, AliasSetTracker *CurAST );
+ const Loop *CurLoop, AliasSetTracker *CurAST,
+ const LICMSafetyInfo *SafetyInfo);
static bool isGuaranteedToExecute(const Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
@@ -92,10 +94,10 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
const AAMDNodes &AAInfo,
AliasSetTracker *CurAST);
-static Instruction *CloneInstructionInExitBlock(const Instruction &I,
- BasicBlock &ExitBlock,
- PHINode &PN,
- const LoopInfo *LI);
+static Instruction *
+CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
+ const LoopInfo *LI,
+ const LICMSafetyInfo *SafetyInfo);
static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
DominatorTree *DT, TargetLibraryInfo *TLI,
Loop *CurLoop, AliasSetTracker *CurAST,
@@ -348,10 +350,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
- if (isNotUsedInLoop(I, CurLoop) &&
+ if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) {
++II;
- Changed |= sink(I, LI, DT, CurLoop, CurAST);
+ Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo);
}
}
return Changed;
@@ -432,6 +434,14 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
(I != E) && !SafetyInfo->MayThrow; ++I)
SafetyInfo->MayThrow |= I->mayThrow();
+
+ // Compute funclet colors if we might sink/hoist in a function with a funclet
+ // personality routine.
+ Function *Fn = CurLoop->getHeader()->getParent();
+ if (Fn->hasPersonalityFn())
+ if (Constant *PersonalityFn = Fn->getPersonalityFn())
+ if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)))
+ SafetyInfo->BlockColors = colorEHFunclets(*Fn);
}
/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
@@ -466,6 +476,10 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
if (isa<DbgInfoIntrinsic>(I))
return false;
+ // Don't sink calls which can throw.
+ if (CI->mayThrow())
+ return false;
+
// Handle simple cases by querying alias analysis.
FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == FMRB_DoesNotAccessMemory)
@@ -534,10 +548,24 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
/// the loop. If this is true, we can sink the instruction to the exit
/// blocks of the loop.
///
-static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) {
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
+ const LICMSafetyInfo *SafetyInfo) {
+ const auto &BlockColors = SafetyInfo->BlockColors;
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
+ const BasicBlock *BB = PN->getParent();
+ // We cannot sink uses in catchswitches.
+ if (isa<CatchSwitchInst>(BB->getTerminator()))
+ return false;
+
+ // We need to sink a callsite to a unique funclet. Avoid sinking if the
+ // phi use is too muddled.
+ if (isa<CallInst>(I))
+ if (!BlockColors.empty() &&
+ BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
+ return false;
+
// A PHI node where all of the incoming values are this instruction are
// special -- they can just be RAUW'ed with the instruction and thus
// don't require a use in the predecessor. This is a particular important
@@ -565,11 +593,41 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) {
return true;
}
-static Instruction *CloneInstructionInExitBlock(const Instruction &I,
- BasicBlock &ExitBlock,
- PHINode &PN,
- const LoopInfo *LI) {
- Instruction *New = I.clone();
+static Instruction *
+CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
+ const LoopInfo *LI,
+ const LICMSafetyInfo *SafetyInfo) {
+ Instruction *New;
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ const auto &BlockColors = SafetyInfo->BlockColors;
+
+ // Sinking call-sites need to be handled differently from other
+ // instructions. The cloned call-site needs a funclet bundle operand
+ // appropriate for it's location in the CFG.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
+ BundleIdx != BundleEnd; ++BundleIdx) {
+ OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
+ if (Bundle.getTagID() == LLVMContext::OB_funclet)
+ continue;
+
+ OpBundles.emplace_back(Bundle);
+ }
+
+ if (!BlockColors.empty()) {
+ const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
+ assert(CV.size() == 1 && "non-unique color for exit block!");
+ BasicBlock *BBColor = CV.front();
+ Instruction *EHPad = BBColor->getFirstNonPHI();
+ if (EHPad->isEHPad())
+ OpBundles.emplace_back("funclet", EHPad);
+ }
+
+ New = CallInst::Create(CI, OpBundles);
+ } else {
+ New = I.clone();
+ }
+
ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
if (!I.getName().empty()) New->setName(I.getName() + ".le");
@@ -601,7 +659,8 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I,
/// position, and may either delete it or move it to outside of the loop.
///
static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
- const Loop *CurLoop, AliasSetTracker *CurAST ) {
+ const Loop *CurLoop, AliasSetTracker *CurAST,
+ const LICMSafetyInfo *SafetyInfo) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
bool Changed = false;
if (isa<LoadInst>(I)) ++NumMovedLoads;
@@ -652,7 +711,7 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
New = It->second;
else
New = SunkCopies[ExitBlock] =
- CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI);
+ CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo);
PN->replaceAllUsesWith(New);
PN->eraseFromParent();
@@ -950,6 +1009,21 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
if (!GuaranteedToExecute)
return Changed;
+ // Figure out the loop exits and their insertion points, if this is the
+ // first promotion.
+ if (ExitBlocks.empty()) {
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ InsertPts.clear();
+ InsertPts.reserve(ExitBlocks.size());
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+ }
+
+ // Can't insert into a catchswitch.
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (isa<CatchSwitchInst>(ExitBlock->getTerminator()))
+ return Changed;
+
// Otherwise, this is safe to promote, lets do it!
DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
Changed = true;
@@ -961,15 +1035,6 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// location is better than none.
DebugLoc DL = LoopUses[0]->getDebugLoc();
- // Figure out the loop exits and their insertion points, if this is the
- // first promotion.
- if (ExitBlocks.empty()) {
- CurLoop->getUniqueExitBlocks(ExitBlocks);
- InsertPts.resize(ExitBlocks.size());
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt();
- }
-
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 2d577de..4521640 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -108,7 +108,11 @@ public:
private:
typedef SmallVector<StoreInst *, 8> StoreList;
- StoreList StoreRefs;
+ StoreList StoreRefsForMemset;
+ StoreList StoreRefsForMemcpy;
+ bool HasMemset;
+ bool HasMemsetPattern;
+ bool HasMemcpy;
/// \name Countable Loop Idiom Handling
/// @{
@@ -118,17 +122,15 @@ private:
SmallVectorImpl<BasicBlock *> &ExitBlocks);
void collectStores(BasicBlock *BB);
- bool isLegalStore(StoreInst *SI);
+ bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment, Value *SplatValue,
+ unsigned StoreAlignment, Value *StoredVal,
Instruction *TheStore, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride);
- bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
- const SCEVAddRecExpr *StoreEv,
- const SCEV *BECount, bool NegStride);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
/// @}
/// \name Noncountable Loop Idiom Handling
@@ -207,8 +209,13 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
*CurLoop->getHeader()->getParent());
DL = &CurLoop->getHeader()->getModule()->getDataLayout();
- if (SE->hasLoopInvariantBackedgeTakenCount(L))
- return runOnCountableLoop();
+ HasMemset = TLI->has(LibFunc::memset);
+ HasMemsetPattern = TLI->has(LibFunc::memset_pattern16);
+ HasMemcpy = TLI->has(LibFunc::memcpy);
+
+ if (HasMemset || HasMemsetPattern || HasMemcpy)
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop();
return runOnNoncountableLoop();
}
@@ -297,7 +304,8 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
}
-bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
+bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
+ bool &ForMemcpy) {
// Don't touch volatile stores.
if (!SI->isSimple())
return false;
@@ -322,22 +330,86 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
return false;
- return true;
+ // See if the store can be turned into a memset.
+
+ // If the stored value is a byte-wise value (like i32 -1), then it may be
+ // turned into a memset of i8 -1, assuming that all the consecutive bytes
+ // are stored. A store of i32 0x01020304 can never be turned into a memset,
+ // but it can be turned into memset_pattern if the target supports it.
+ Value *SplatValue = isBytewiseValue(StoredVal);
+ Constant *PatternValue = nullptr;
+
+ // If we're allowed to form a memset, and the stored value would be
+ // acceptable for memset, use it.
+ if (HasMemset && SplatValue &&
+ // Verify that the stored value is loop invariant. If not, we can't
+ // promote the memset.
+ CurLoop->isLoopInvariant(SplatValue)) {
+ // It looks like we can use SplatValue.
+ ForMemset = true;
+ return true;
+ } else if (HasMemsetPattern &&
+ // Don't create memset_pattern16s with address spaces.
+ StorePtr->getType()->getPointerAddressSpace() == 0 &&
+ (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
+ // It looks like we can use PatternValue!
+ ForMemset = true;
+ return true;
+ }
+
+ // Otherwise, see if the store can be turned into a memcpy.
+ if (HasMemcpy) {
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ unsigned Stride = getStoreStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ if (StoreSize != Stride && StoreSize != -Stride)
+ return false;
+
+ // The store must be feeding a non-volatile load.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ if (!LI || !LI->isSimple())
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+
+ // The store and load must share the same stride.
+ if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
+ return false;
+
+ // Success. This store can be converted into a memcpy.
+ ForMemcpy = true;
+ return true;
+ }
+ // This store can't be transformed into a memset/memcpy.
+ return false;
}
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
- StoreRefs.clear();
+ StoreRefsForMemset.clear();
+ StoreRefsForMemcpy.clear();
for (Instruction &I : *BB) {
StoreInst *SI = dyn_cast<StoreInst>(&I);
if (!SI)
continue;
+ bool ForMemset = false;
+ bool ForMemcpy = false;
// Make sure this is a strided store with a constant stride.
- if (!isLegalStore(SI))
+ if (!isLegalStore(SI, ForMemset, ForMemcpy))
continue;
// Save the store locations.
- StoreRefs.push_back(SI);
+ if (ForMemset)
+ StoreRefsForMemset.push_back(SI);
+ else if (ForMemcpy)
+ StoreRefsForMemcpy.push_back(SI);
}
}
@@ -357,9 +429,15 @@ bool LoopIdiomRecognize::runOnLoopBlock(
bool MadeChange = false;
// Look for store instructions, which may be optimized to memset/memcpy.
collectStores(BB);
- for (auto &SI : StoreRefs)
+
+ // Look for a single store which can be optimized into a memset.
+ for (auto &SI : StoreRefsForMemset)
MadeChange |= processLoopStore(SI, BECount);
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.
+ for (auto &SI : StoreRefsForMemcpy)
+ MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
+
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = &*I++;
// Look for memset instructions, which may be optimized to a larger memset.
@@ -380,7 +458,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(
return MadeChange;
}
-/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+/// processLoopStore - See if this store can be promoted to a memset.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
assert(SI->isSimple() && "Expected only non-volatile stores.");
@@ -398,12 +476,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
bool NegStride = StoreSize == -Stride;
// See if we can optimize just this store in isolation.
- if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
- StoredVal, SI, StoreEv, BECount, NegStride))
- return true;
-
- // Optimize the store into a memcpy, if it feeds an similarly strided load.
- return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride);
+ return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount, NegStride);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
@@ -440,8 +514,14 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
if (!Stride || MSI->getLength() != Stride->getValue())
return false;
+ // Verify that the memset value is loop invariant. If not, we can't promote
+ // the memset.
+ Value *SplatValue = MSI->getValue();
+ if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
+ return false;
+
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), MSI->getValue(), MSI, Ev,
+ MSI->getAlignment(), SplatValue, MSI, Ev,
BECount, /*NegStride=*/false);
}
@@ -496,37 +576,19 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride) {
-
- // If the stored value is a byte-wise value (like i32 -1), then it may be
- // turned into a memset of i8 -1, assuming that all the consecutive bytes
- // are stored. A store of i32 0x01020304 can never be turned into a memset,
- // but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
- unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
- // If we're allowed to form a memset, and the stored value would be acceptable
- // for memset, use it.
- if (SplatValue && TLI->has(LibFunc::memset) &&
- // Verify that the stored value is loop invariant. If not, we can't
- // promote the memset.
- CurLoop->isLoopInvariant(SplatValue)) {
- // Keep and use SplatValue.
- PatternValue = nullptr;
- } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) &&
- (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
- // Don't create memset_pattern16s with address spaces.
- // It looks like we can use PatternValue!
- SplatValue = nullptr;
- } else {
- // Otherwise, this isn't an idiom we can transform. For example, we can't
- // do anything with a 3-byte store.
- return false;
- }
+ if (!SplatValue)
+ PatternValue = getMemSetPatternValue(StoredVal, DL);
+
+ assert((SplatValue || PatternValue) &&
+ "Expected either splat value or pattern value.");
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. This allows us to insert code for it in the preheader.
+ unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, *DL, "loop-idiom");
@@ -608,29 +670,25 @@ bool LoopIdiomRecognize::processLoopStridedStore(
/// If the stored value is a strided load in the same loop with the same stride
/// this may be transformable into a memcpy. This kicks in for stuff like
/// for (i) A[i] = B[i];
-bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
- StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
- const SCEV *BECount, bool NegStride) {
- // If we're not allowed to form memcpy, we fail.
- if (!TLI->has(LibFunc::memcpy))
- return false;
+bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
+ const SCEV *BECount) {
+ assert(SI->isSimple() && "Expected only non-volatile stores.");
+
+ Value *StorePtr = SI->getPointerOperand();
+ const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getStoreStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ bool NegStride = StoreSize == -Stride;
// The store must be feeding a non-volatile load.
- LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
- if (!LI || !LI->isSimple())
- return false;
+ LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+ assert(LI->isSimple() && "Expected only non-volatile stores.");
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load. If we have something else, it's a
// random load we can't handle.
const SCEVAddRecExpr *LoadEv =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
- if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
- return false;
-
- // The store and load must share the same stride.
- if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
- return false;
+ cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0333bf2..7354016 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -481,6 +481,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
return AMemSet;
}
+static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
+ const LoadInst *LI) {
+ unsigned StoreAlign = SI->getAlignment();
+ if (!StoreAlign)
+ StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
+ unsigned LoadAlign = LI->getAlignment();
+ if (!LoadAlign)
+ LoadAlign = DL.getABITypeAlignment(LI->getType());
+
+ return std::min(StoreAlign, LoadAlign);
+}
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
@@ -496,12 +507,84 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
const DataLayout &DL = SI->getModule()->getDataLayout();
- // Detect cases where we're performing call slot forwarding, but
- // happen to be using a load-store pair to implement it, rather than
- // a memcpy.
+ // Load to store forwarding can be interpreted as memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
+
+ auto *T = LI->getType();
+ if (T->isAggregateType()) {
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ MemoryLocation LoadLoc = MemoryLocation::get(LI);
+
+ // We use alias analysis to check if an instruction may store to
+ // the memory we load from in between the load and the store. If
+ // such an instruction is found, we try to promote there instead
+ // of at the store position.
+ Instruction *P = SI;
+ for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator();
+ I != E; ++I) {
+ if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod))
+ continue;
+
+ // We found an instruction that may write to the loaded memory.
+ // We can try to promote at this position instead of the store
+ // position if nothing alias the store memory after this.
+ P = &*I;
+ for (; I != E; ++I) {
+ MemoryLocation StoreLoc = MemoryLocation::get(SI);
+ if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
+ DEBUG(dbgs() << "Alias " << *I << "\n");
+ P = nullptr;
+ break;
+ }
+ }
+
+ break;
+ }
+
+ // If a valid insertion position is found, then we can promote
+ // the load/store pair to a memcpy.
+ if (P) {
+ // If we load from memory that may alias the memory we store to,
+ // memmove must be used to preserve semantic. If not, memcpy can
+ // be used.
+ bool UseMemMove = false;
+ if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
+ UseMemMove = true;
+
+ unsigned Align = findCommonAlignment(DL, SI, LI);
+ uint64_t Size = DL.getTypeStoreSize(T);
+
+ IRBuilder<> Builder(P);
+ Instruction *M;
+ if (UseMemMove)
+ M = Builder.CreateMemMove(SI->getPointerOperand(),
+ LI->getPointerOperand(), Size,
+ Align, SI->isVolatile());
+ else
+ M = Builder.CreateMemCpy(SI->getPointerOperand(),
+ LI->getPointerOperand(), Size,
+ Align, SI->isVolatile());
+
+ DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI
+ << " => " << *M << "\n");
+
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ MD->removeInstruction(LI);
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+
+ // Make sure we do not invalidate the iterator.
+ BBI = M->getIterator();
+ return true;
+ }
+ }
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
MemDepResult ldep = MD->getDependency(LI);
CallInst *C = nullptr;
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
@@ -522,18 +605,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
}
if (C) {
- unsigned storeAlign = SI->getAlignment();
- if (!storeAlign)
- storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
- unsigned loadAlign = LI->getAlignment();
- if (!loadAlign)
- loadAlign = DL.getABITypeAlignment(LI->getType());
-
bool changed = performCallSlotOptzn(
LI, SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
- std::min(storeAlign, loadAlign), C);
+ findCommonAlignment(DL, SI, LI), C);
if (changed) {
MD->removeInstruction(SI);
SI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index fb970c7..401a740 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -183,6 +183,8 @@ namespace {
Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
Value *RemoveFactorFromExpression(Value *V, Value *Factor);
void EraseInst(Instruction *I);
+ void RecursivelyEraseDeadInsts(Instruction *I,
+ SetVector<AssertingVH<Instruction>> &Insts);
void OptimizeInst(Instruction *I);
Instruction *canonicalizeNegConstExpr(Instruction *I);
};
@@ -1926,6 +1928,22 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
return nullptr;
}
+// Remove dead instructions and if any operands are trivially dead add them to
+// Insts so they will be removed as well.
+void Reassociate::RecursivelyEraseDeadInsts(
+ Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) {
+ assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
+ SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end());
+ ValueRankMap.erase(I);
+ Insts.remove(I);
+ RedoInsts.remove(I);
+ I->eraseFromParent();
+ for (auto Op : Ops)
+ if (Instruction *OpInst = dyn_cast<Instruction>(Op))
+ if (OpInst->use_empty())
+ Insts.insert(OpInst);
+}
+
/// Zap the given instruction, adding interesting operands to the work list.
void Reassociate::EraseInst(Instruction *I) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
@@ -2255,7 +2273,21 @@ bool Reassociate::runOnFunction(Function &F) {
++II;
}
- // If this produced extra instructions to optimize, handle them now.
+ // Make a copy of all the instructions to be redone so we can remove dead
+ // instructions.
+ SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
+ // Iterate over all instructions to be reevaluated and remove trivially dead
+ // instructions. If any operand of the trivially dead instruction becomes
+ // dead mark it for deletion as well. Continue this process until all
+ // trivially dead instructions have been removed.
+ while (!ToRedo.empty()) {
+ Instruction *I = ToRedo.pop_back_val();
+ if (isInstructionTriviallyDead(I))
+ RecursivelyEraseDeadInsts(I, ToRedo);
+ }
+
+ // Now that we have removed dead instructions, we can reoptimize the
+ // remaining instructions.
while (!RedoInsts.empty()) {
Instruction *I = RedoInsts.pop_back_val();
if (isInstructionTriviallyDead(I))
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index db127c3..5d253be 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -428,30 +428,15 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// We should have never reached here if this argument isn't an gc value
return BaseDefiningValueResult(I, true);
- if (isa<GlobalVariable>(I))
- // base case
+ if (isa<Constant>(I))
+ // We assume that objects with a constant base (e.g. a global) can't move
+ // and don't need to be reported to the collector because they are always
+ // live. All constants have constant bases. Besides global references, all
+ // kinds of constants (e.g. undef, constant expressions, null pointers) can
+ // be introduced by the inliner or the optimizer, especially on dynamically
+ // dead paths. See e.g. test4 in constants.ll.
return BaseDefiningValueResult(I, true);
- // inlining could possibly introduce phi node that contains
- // undef if callee has multiple returns
- if (isa<UndefValue>(I))
- // utterly meaningless, but useful for dealing with
- // partially optimized code.
- return BaseDefiningValueResult(I, true);
-
- // Due to inheritance, this must be _after_ the global variable and undef
- // checks
- if (isa<Constant>(I)) {
- assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
- "order of checks wrong!");
- // Note: Even for frontends which don't have constant references, we can
- // see constants appearing after optimizations. A simple example is
- // specialization of an address computation on null feeding into a merge
- // point where the actual use of the now-constant input is protected by
- // another null check. (e.g. test4 in constants.ll)
- return BaseDefiningValueResult(I, true);
- }
-
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
// If stripping pointer casts changes the address space there is an
@@ -1642,33 +1627,24 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseSet<Value *> &VisitedLiveValues) {
for (User *U : GCRelocs) {
- if (!isa<IntrinsicInst>(U))
+ GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
+ if (!Relocate)
continue;
- IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U);
-
- // We only care about relocates
- if (RelocatedValue->getIntrinsicID() !=
- Intrinsic::experimental_gc_relocate) {
- continue;
- }
-
- GCRelocateOperands RelocateOperands(RelocatedValue);
- Value *OriginalValue =
- const_cast<Value *>(RelocateOperands.getDerivedPtr());
+ Value *OriginalValue = const_cast<Value *>(Relocate->getDerivedPtr());
assert(AllocaMap.count(OriginalValue));
Value *Alloca = AllocaMap[OriginalValue];
// Emit store into the related alloca
// All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
// the correct type according to alloca.
- assert(RelocatedValue->getNextNode() &&
+ assert(Relocate->getNextNode() &&
"Should always have one since it's not a terminator");
- IRBuilder<> Builder(RelocatedValue->getNextNode());
+ IRBuilder<> Builder(Relocate->getNextNode());
Value *CastedRelocatedValue =
- Builder.CreateBitCast(RelocatedValue,
+ Builder.CreateBitCast(Relocate,
cast<AllocaInst>(Alloca)->getAllocatedType(),
- suffixed_name_or(RelocatedValue, ".casted", ""));
+ suffixed_name_or(Relocate, ".casted", ""));
StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 0914699..42287d3 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -74,17 +74,13 @@ namespace llvm {
// insertFastDiv - Substitutes the div/rem instruction with code that checks the
// value of the operands and uses a shorter-faster div/rem instruction when
// possible and the longer-slower div/rem instruction otherwise.
-static bool insertFastDiv(Function &F,
- Function::iterator &I,
- BasicBlock::iterator &J,
- IntegerType *BypassType,
- bool UseDivOp,
- bool UseSignedOp,
+static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
+ bool UseDivOp, bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
+ Function *F = I->getParent()->getParent();
// Get instruction operands
- Instruction *Instr = &*J;
- Value *Dividend = Instr->getOperand(0);
- Value *Divisor = Instr->getOperand(1);
+ Value *Dividend = I->getOperand(0);
+ Value *Divisor = I->getOperand(1);
if (isa<ConstantInt>(Divisor) ||
(isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
@@ -94,13 +90,12 @@ static bool insertFastDiv(Function &F,
}
// Basic Block is split before divide
- BasicBlock *MainBB = &*I;
- BasicBlock *SuccessorBB = I->splitBasicBlock(J);
- ++I; //advance iterator I to successorBB
+ BasicBlock *MainBB = &*I->getParent();
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
// Add new basic block for slow divide operation
- BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
- MainBB->getParent(), SuccessorBB);
+ BasicBlock *SlowBB =
+ BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
SlowBB->moveBefore(SuccessorBB);
IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
Value *SlowQuotientV;
@@ -115,8 +110,8 @@ static bool insertFastDiv(Function &F,
SlowBuilder.CreateBr(SuccessorBB);
// Add new basic block for fast divide operation
- BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
- MainBB->getParent(), SuccessorBB);
+ BasicBlock *FastBB =
+ BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
FastBB->moveBefore(SlowBB);
IRBuilder<> FastBuilder(FastBB, FastBB->begin());
Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
@@ -139,19 +134,19 @@ static bool insertFastDiv(Function &F,
// Phi nodes for result of div and rem
IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
- PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
QuoPhi->addIncoming(SlowQuotientV, SlowBB);
QuoPhi->addIncoming(FastQuotientV, FastBB);
- PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
RemPhi->addIncoming(SlowRemainderV, SlowBB);
RemPhi->addIncoming(FastRemainderV, FastBB);
- // Replace Instr with appropriate phi node
+ // Replace I with appropriate phi node
if (UseDivOp)
- Instr->replaceAllUsesWith(QuoPhi);
+ I->replaceAllUsesWith(QuoPhi);
else
- Instr->replaceAllUsesWith(RemPhi);
- Instr->eraseFromParent();
+ I->replaceAllUsesWith(RemPhi);
+ I->eraseFromParent();
// Combine operands into a single value with OR for value testing below
MainBB->getInstList().back().eraseFromParent();
@@ -168,9 +163,6 @@ static bool insertFastDiv(Function &F,
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
- // point iterator J at first instruction of successorBB
- J = I->begin();
-
// Cache phi nodes to be used later in place of other instances
// of div or rem with the same sign, dividend, and divisor
DivOpInfo Key(UseSignedOp, Dividend, Divisor);
@@ -179,57 +171,54 @@ static bool insertFastDiv(Function &F,
return true;
}
-// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
-// operands and operation are identical. Otherwise call insertFastDiv to perform
-// the optimization and cache the resulting dividend and remainder.
-static bool reuseOrInsertFastDiv(Function &F,
- Function::iterator &I,
- BasicBlock::iterator &J,
- IntegerType *BypassType,
- bool UseDivOp,
- bool UseSignedOp,
+// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
+// the current BB if operands and operation are identical. Otherwise calls
+// insertFastDiv to perform the optimization and caches the resulting dividend
+// and remainder.
+static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
+ bool UseDivOp, bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
// Get instruction operands
- Instruction *Instr = &*J;
- DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
+ DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
if (CacheI == PerBBDivCache.end()) {
// If previous instance does not exist, insert fast div
- return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
- PerBBDivCache);
+ return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
}
// Replace operation value with previously generated phi node
DivPhiNodes &Value = CacheI->second;
if (UseDivOp) {
// Replace all uses of div instruction with quotient phi node
- J->replaceAllUsesWith(Value.Quotient);
+ I->replaceAllUsesWith(Value.Quotient);
} else {
// Replace all uses of rem instruction with remainder phi node
- J->replaceAllUsesWith(Value.Remainder);
+ I->replaceAllUsesWith(Value.Remainder);
}
- // Advance to next operation
- ++J;
-
// Remove redundant operation
- Instr->eraseFromParent();
+ I->eraseFromParent();
return true;
}
-// bypassSlowDivision - This optimization identifies DIV instructions that can
-// be profitably bypassed and carried out with a shorter, faster divide.
-bool llvm::bypassSlowDivision(Function &F,
- Function::iterator &I,
- const DenseMap<unsigned int, unsigned int> &BypassWidths) {
+// bypassSlowDivision - This optimization identifies DIV instructions in a BB
+// that can be profitably bypassed and carried out with a shorter, faster
+// divide.
+bool llvm::bypassSlowDivision(
+ BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
DivCacheTy DivCache;
bool MadeChange = false;
- for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
+ Instruction* Next = &*BB->begin();
+ while (Next != nullptr) {
+ // We may add instructions immediately after I, but we want to skip over
+ // them.
+ Instruction* I = Next;
+ Next = Next->getNextNode();
// Get instruction details
- unsigned Opcode = J->getOpcode();
+ unsigned Opcode = I->getOpcode();
bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
bool UseSignedOp = Opcode == Instruction::SDiv ||
@@ -240,11 +229,11 @@ bool llvm::bypassSlowDivision(Function &F,
continue;
// Skip division on vector types, only optimize integer instructions
- if (!J->getType()->isIntegerTy())
+ if (!I->getType()->isIntegerTy())
continue;
// Get bitwidth of div/rem instruction
- IntegerType *T = cast<IntegerType>(J->getType());
+ IntegerType *T = cast<IntegerType>(I->getType());
unsigned int bitwidth = T->getBitWidth();
// Continue if bitwidth is not bypassed
@@ -253,10 +242,9 @@ bool llvm::bypassSlowDivision(Function &F,
continue;
// Get type for div/rem instruction with bypass bitwidth
- IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
+ IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
- MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
- UseSignedOp, DivCache);
+ MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
}
return MadeChange;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index e75163f..0e386ac 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1305,8 +1305,9 @@ static bool markAliveBlocks(Function &F,
}
}
- // Turn invokes that call 'nounwind' functions into ordinary calls.
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ TerminatorInst *Terminator = BB->getTerminator();
+ if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
Value *Callee = II->getCalledValue();
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
changeToUnreachable(II, true);
@@ -1321,6 +1322,44 @@ static bool markAliveBlocks(Function &F,
changeToCall(II);
Changed = true;
}
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
+ // Remove catchpads which cannot be reached.
+ struct CatchPadDenseMapInfo {
+ static CatchPadInst *getEmptyKey() {
+ return DenseMapInfo<CatchPadInst *>::getEmptyKey();
+ }
+ static CatchPadInst *getTombstoneKey() {
+ return DenseMapInfo<CatchPadInst *>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CatchPadInst *CatchPad) {
+ return static_cast<unsigned>(hash_combine_range(
+ CatchPad->value_op_begin(), CatchPad->value_op_end()));
+ }
+ static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) {
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+ };
+
+ // Set of unique CatchPads.
+ SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4,
+ CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>>
+ HandlerSet;
+ detail::DenseSetEmpty Empty;
+ for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(),
+ E = CatchSwitch->handler_end();
+ I != E; ++I) {
+ BasicBlock *HandlerBB = *I;
+ auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI());
+ if (!HandlerSet.insert({CatchPad, Empty}).second) {
+ CatchSwitch->removeHandler(I);
+ --I;
+ --E;
+ Changed = true;
+ }
+ }
}
Changed |= ConstantFoldTerminator(BB, true);
@@ -1514,8 +1553,8 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
return true;
// Check if the function is specifically marked as a gc leaf function.
- //
- // TODO: we should be checking the attributes on the call site as well.
+ if (CS.hasFnAttr("gc-leaf-function"))
+ return true;
if (const Function *F = CS.getCalledFunction())
return F->hasFnAttribute("gc-leaf-function");
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index d0932f83..3bb3fa5 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -3448,18 +3449,26 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
if (BBI->mayHaveSideEffects()) {
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (auto *SI = dyn_cast<StoreInst>(BBI)) {
if (SI->isVolatile())
break;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ } else if (auto *LI = dyn_cast<LoadInst>(BBI)) {
if (LI->isVolatile())
break;
- } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
+ } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
if (RMWI->isVolatile())
break;
- } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
+ } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
if (CXI->isVolatile())
break;
+ } else if (isa<CatchPadInst>(BBI)) {
+ // A catchpad may invoke exception object constructors and such, which
+ // in some languages can be arbitrary code, so be conservative by
+ // default.
+ // For CoreCLR, it just involves a type test, so can be removed.
+ if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) !=
+ EHPersonality::CoreCLR)
+ break;
} else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
!isa<LandingPadInst>(BBI)) {
break;
@@ -3485,7 +3494,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
TerminatorInst *TI = Preds[i]->getTerminator();
IRBuilder<> Builder(TI);
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (auto *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isUnconditional()) {
if (BI->getSuccessor(0) == BB) {
new UnreachableInst(TI->getContext(), TI);
@@ -3502,7 +3511,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
Changed = true;
}
}
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i)
if (i.getCaseSuccessor() == BB) {
@@ -3511,18 +3520,49 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
--i; --e;
Changed = true;
}
- } else if ((isa<InvokeInst>(TI) &&
- cast<InvokeInst>(TI)->getUnwindDest() == BB) ||
- isa<CatchSwitchInst>(TI)) {
- removeUnwindEdge(TI->getParent());
- Changed = true;
+ } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ removeUnwindEdge(TI->getParent());
+ Changed = true;
+ }
+ } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
+ if (CSI->getUnwindDest() == BB) {
+ removeUnwindEdge(TI->getParent());
+ Changed = true;
+ continue;
+ }
+
+ for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
+ E = CSI->handler_end();
+ I != E; ++I) {
+ if (*I == BB) {
+ CSI->removeHandler(I);
+ --I;
+ --E;
+ Changed = true;
+ }
+ }
+ if (CSI->getNumHandlers() == 0) {
+ BasicBlock *CatchSwitchBB = CSI->getParent();
+ if (CSI->hasUnwindDest()) {
+ // Redirect preds to the unwind dest
+ CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest());
+ } else {
+ // Rewrite all preds to unwind to caller (or from invoke to call).
+ SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB));
+ for (BasicBlock *EHPred : EHPreds)
+ removeUnwindEdge(EHPred);
+ }
+ // The catchswitch is no longer reachable.
+ new UnreachableInst(CSI->getContext(), CSI);
+ CSI->eraseFromParent();
+ Changed = true;
+ }
} else if (isa<CleanupReturnInst>(TI)) {
new UnreachableInst(TI->getContext(), TI);
TI->eraseFromParent();
Changed = true;
}
- // TODO: We can remove a catchswitch if all it's catchpads end in
- // unreachable.
}
// If this block is now dead, remove it.
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 81dea6d..dc5fee5 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -57,8 +57,7 @@ static bool ignoreCallingConv(LibFunc::Func Func) {
Func == LibFunc::llabs || Func == LibFunc::strlen;
}
-/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero.
+/// Return true if it only matters that the value is equal or not-equal to zero.
static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
for (User *U : V->users()) {
if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -72,8 +71,7 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
return true;
}
-/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality
-/// comparisons with With.
+/// Return true if it is only used in equality comparisons with With.
static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
for (User *U : V->users()) {
if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -249,12 +247,12 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
!FT->getParamType(2)->isIntegerTy())
return nullptr;
- // Extract some information from the instruction
+ // Extract some information from the instruction.
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
uint64_t Len;
- // We don't do anything if length is not constant
+ // We don't do anything if length is not constant.
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Len = LengthArg->getZExtValue();
else
@@ -272,12 +270,12 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
if (SrcLen == 0 || Len == 0)
return Dst;
- // We don't optimize this case
+ // We don't optimize this case.
if (Len < SrcLen)
return nullptr;
// strncat(x, s, c) -> strcat(x, s)
- // s is constant so the strcat can be optimized further
+ // s is constant so the strcat can be optimized further.
return emitStrLenMemCpy(Src, Dst, SrcLen, B);
}
@@ -310,7 +308,8 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI),
+ "strchr");
return nullptr;
}
@@ -490,8 +489,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Type *PT = Callee->getFunctionType()->getParamType(0);
Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
- Value *DstEnd =
- B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
+ Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst,
+ ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
@@ -599,7 +598,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
if (I == StringRef::npos) // No match.
return Constant::getNullValue(CI->getType());
- return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I),
+ "strpbrk");
}
// strpbrk(s, "a") -> strchr(s, 'a')
@@ -878,8 +878,10 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
Type *RHSPtrTy =
IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
- Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
- Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+ Value *LHSV =
+ B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
+ Value *RHSV =
+ B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
}
@@ -992,6 +994,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
if (V == nullptr)
return nullptr;
+
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.SetFastMathFlags(CI->getFastMathFlags());
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
@@ -1027,6 +1033,10 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
if (V2 == nullptr)
return nullptr;
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.SetFastMathFlags(CI->getFastMathFlags());
+
// fmin((double)floatval1, (double)floatval2)
// -> (double)fminf(floatval1, floatval2)
// TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
@@ -1117,7 +1127,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Callee->getAttributes());
}
- bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
+ bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
// pow(exp(x), y) -> exp(x*y)
// pow(exp2(x), y) -> exp2(x * y)
@@ -1126,7 +1136,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// underflow behavior quite dramatically.
// Example: x = 1000, y = 0.001.
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- if (unsafeFPMath) {
+ if (UnsafeFPMath) {
if (auto *OpC = dyn_cast<CallInst>(Op1)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
@@ -1157,7 +1167,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
LibFunc::fabsl)) {
// In -ffast-math, pow(x, 0.5) -> sqrt(x).
- if (unsafeFPMath)
+ if (UnsafeFPMath)
return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
Callee->getAttributes());
@@ -1183,7 +1193,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
// In -ffast-math, generate repeated fmul instead of generating pow(x, n).
- if (unsafeFPMath) {
+ if (UnsafeFPMath) {
APFloat V = abs(Op2C->getValueAPF());
// We limit to a max of 7 fmul(s). Thus max exponent is 32.
// This transformation applies to integer exponents only.
@@ -1291,12 +1301,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
// function, do that first.
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
- if ((Name == "fmin" && hasFloatVersion(Name)) ||
- (Name == "fmax" && hasFloatVersion(Name))) {
- Value *Ret = optimizeBinaryDoubleFP(CI, B);
- if (Ret)
+ if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
+ if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
return Ret;
- }
// Make sure this has 2 arguments of FP type which match the result type.
FunctionType *FT = Callee->getFunctionType();
@@ -1307,14 +1314,12 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
- Function *F = CI->getParent()->getParent();
- if (canUseUnsafeFPMath(F)) {
+ if (CI->hasUnsafeAlgebra()) {
// Unsafe algebra sets all fast-math-flags to true.
FMF.setUnsafeAlgebra();
} else {
// At a minimum, no-nans-fp-math must be true.
- Attribute Attr = F->getFnAttribute("no-nans-fp-math");
- if (Attr.getValueAsString() != "true")
+ if (!CI->hasNoNaNs())
return nullptr;
// No-signed-zeros is implied by the definitions of fmax/fmin themselves:
// "Ideally, fmax would be sensitive to the sign of zero, for example
@@ -2169,7 +2174,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
- IRBuilder<> Builder(CI);
+
+ SmallVector<OperandBundleDef, 2> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+ IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
// Command-line parameter overrides function attribute.
@@ -2419,7 +2427,8 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
return false;
}
-Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) {
+Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
+ IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk))
@@ -2433,7 +2442,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &
return nullptr;
}
-Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) {
+Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
+ IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk))
@@ -2447,7 +2457,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<>
return nullptr;
}
-Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) {
+Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
+ IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk))
@@ -2539,7 +2550,10 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
- IRBuilder<> Builder(CI);
+
+ SmallVector<OperandBundleDef, 2> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+ IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
// First, check that this is a known library functions.
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 1add78e..2e361d3 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -218,12 +218,12 @@ static Metadata *mapMetadataOp(Metadata *Op,
}
/// Resolve uniquing cycles involving the given metadata.
-static void resolveCycles(Metadata *MD, bool MDMaterialized) {
+static void resolveCycles(Metadata *MD, bool AllowTemps) {
if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
- if (!MDMaterialized && N->isTemporary())
+ if (AllowTemps && N->isTemporary())
return;
if (!N->isResolved())
- N->resolveCycles(MDMaterialized);
+ N->resolveCycles(AllowTemps);
}
}
@@ -253,7 +253,7 @@ static bool remapOperands(MDNode &Node,
// Resolve uniquing cycles underneath distinct nodes on the fly so they
// don't infect later operands.
if (IsDistinct)
- resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata));
+ resolveCycles(New, Flags & RF_HaveUnmaterializedMetadata);
}
}
@@ -401,7 +401,7 @@ Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
return NewMD;
// Resolve cycles involving the entry metadata.
- resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata));
+ resolveCycles(NewMD, Flags & RF_HaveUnmaterializedMetadata);
// Remap the operands of distinct MDNodes.
while (!DistinctWorklist.empty())
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a627dd6..2c0d317 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4294,12 +4294,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop,
- Reductions[Phi])) {
- if (Reductions[Phi].hasUnsafeAlgebra())
- Requirements->addUnsafeAlgebraInst(
- Reductions[Phi].getUnsafeAlgebraInst());
- AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
+ RecurrenceDescriptor RedDes;
+ if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) {
+ if (RedDes.hasUnsafeAlgebra())
+ Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
+ AllowedExit.insert(RedDes.getLoopExitInstr());
+ Reductions[Phi] = RedDes;
continue;
}
OpenPOWER on IntegriCloud