summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/LICM.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp170
1 files changed, 153 insertions, 17 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index f51d11c..37b9c4b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -77,10 +77,16 @@ STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
+/// Memory promotion is enabled by default.
static cl::opt<bool>
- DisablePromotion("disable-licm-promotion", cl::Hidden,
+ DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
cl::desc("Disable memory promotion in LICM pass"));
+static cl::opt<uint32_t> MaxNumUsesTraversed(
+ "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
+ cl::desc("Max num uses visited for identifying load "
+ "invariance in loop using invariant start (default = 8)"));
+
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo);
@@ -201,9 +207,9 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true))
return PreservedAnalyses::all();
- // FIXME: There is no setPreservesCFG in the new PM. When that becomes
- // available, it should be used here.
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
char LegacyLICMPass::ID = 0;
@@ -425,6 +431,29 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
+ // Attempt to remove floating point division out of the loop by converting
+ // it to a reciprocal multiplication.
+ if (I.getOpcode() == Instruction::FDiv &&
+ CurLoop->isLoopInvariant(I.getOperand(1)) &&
+ I.hasAllowReciprocal()) {
+ auto Divisor = I.getOperand(1);
+ auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
+ auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
+ ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
+ ReciprocalDivisor->insertBefore(&I);
+
+ auto Product = BinaryOperator::CreateFMul(I.getOperand(0),
+ ReciprocalDivisor);
+ Product->setFastMathFlags(I.getFastMathFlags());
+ Product->insertAfter(&I);
+ I.replaceAllUsesWith(Product);
+ I.eraseFromParent();
+
+ hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
+ Changed = true;
+ continue;
+ }
+
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
@@ -461,7 +490,10 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
// Iterate over loop instructions and compute safety info.
- for (Loop::block_iterator BB = CurLoop->block_begin(),
+ // Skip header as it has been computed and stored in HeaderMayThrow.
+ // The first block in loopinfo.Blocks is guaranteed to be the header.
+ assert(Header == *CurLoop->getBlocks().begin() && "First block must be header");
+ for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
BBE = CurLoop->block_end();
(BB != BBE) && !SafetyInfo->MayThrow; ++BB)
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
@@ -477,6 +509,59 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
SafetyInfo->BlockColors = colorEHFunclets(*Fn);
}
+// Return true if LI is invariant within scope of the loop. LI is invariant if
+// CurLoop is dominated by an invariant.start representing the same memory location
+// and size as the memory location LI loads from, and also the invariant.start
+// has no uses.
+static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
+ Loop *CurLoop) {
+ Value *Addr = LI->getOperand(0);
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
+ cast<PointerType>(Addr->getType())->getElementType());
+
+ // if the type is i8 addrspace(x)*, we know this is the type of
+ // llvm.invariant.start operand
+ auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
+ LI->getPointerAddressSpace());
+ unsigned BitcastsVisited = 0;
+ // Look through bitcasts until we reach the i8* type (this is invariant.start
+ // operand type).
+ while (Addr->getType() != PtrInt8Ty) {
+ auto *BC = dyn_cast<BitCastInst>(Addr);
+ // Avoid traversing high number of bitcast uses.
+ if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
+ return false;
+ Addr = BC->getOperand(0);
+ }
+
+ unsigned UsesVisited = 0;
+ // Traverse all uses of the load operand value, to see if invariant.start is
+ // one of the uses, and whether it dominates the load instruction.
+ for (auto *U : Addr->users()) {
+ // Avoid traversing for Load operand with high number of users.
+ if (++UsesVisited > MaxNumUsesTraversed)
+ return false;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
+ // If there are escaping uses of invariant.start instruction, the load maybe
+ // non-invariant.
+ if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
+ !II->use_empty())
+ continue;
+ unsigned InvariantSizeInBits =
+ cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
+ // Confirm the invariant.start location size contains the load operand size
+ // in bits. Also, the invariant.start should dominate the load, and we
+ // should not hoist the load out of a loop that contains this dominating
+ // invariant.start.
+ if (LocSizeInBits <= InvariantSizeInBits &&
+ DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
+ return true;
+ }
+
+ return false;
+}
+
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
LoopSafetyInfo *SafetyInfo,
@@ -493,6 +578,10 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (LI->getMetadata(LLVMContext::MD_invariant_load))
return true;
+ // This checks for an invariant.start dominating the load.
+ if (isLoadInvariantInLoop(LI, DT, CurLoop))
+ return true;
+
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
@@ -782,7 +871,7 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
<< "\n");
ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I)
- << "hosting " << ore::NV("Inst", &I));
+ << "hoisting " << ore::NV("Inst", &I));
// Metadata can be dependent on conditions we are hoisting above.
// Conservatively strip all metadata on the instruction unless we were
@@ -852,6 +941,7 @@ class LoopPromoter : public LoadAndStorePromoter {
LoopInfo &LI;
DebugLoc DL;
int Alignment;
+ bool UnorderedAtomic;
AAMDNodes AATags;
Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
@@ -875,10 +965,11 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- const AAMDNodes &AATags)
+ bool UnorderedAtomic, const AAMDNodes &AATags)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
- LI(li), DL(std::move(dl)), Alignment(alignment), AATags(AATags) {}
+ LI(li), DL(std::move(dl)), Alignment(alignment),
+ UnorderedAtomic(UnorderedAtomic),AATags(AATags) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@@ -902,6 +993,8 @@ public:
Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
+ if (UnorderedAtomic)
+ NewSI->setOrdering(AtomicOrdering::Unordered);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (AATags)
@@ -992,18 +1085,41 @@ bool llvm::promoteLoopAccessesToScalars(
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
+ // Keep track of which types of access we see
+ bool SawUnorderedAtomic = false;
+ bool SawNotAtomic = false;
AAMDNodes AATags;
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
+ // Do we know this object does not escape ?
+ bool IsKnownNonEscapingObject = false;
if (SafetyInfo->MayThrow) {
// If a loop can throw, we have to insert a store along each unwind edge.
// That said, we can't actually make the unwind edge explicit. Therefore,
// we have to prove that the store is dead along the unwind edge.
//
- // Currently, this code just special-cases alloca instructions.
- if (!isa<AllocaInst>(GetUnderlyingObject(SomePtr, MDL)))
- return false;
+ // If the underlying object is not an alloca, nor a pointer that does not
+ // escape, then we can not effectively prove that the store is dead along
+ // the unwind edge. i.e. the caller of this function could have ways to
+ // access the pointed object.
+ Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ // If this is a base pointer we do not understand, simply bail.
+ // We only handle alloca and return value from alloc-like fn right now.
+ if (!isa<AllocaInst>(Object)) {
+ if (!isAllocLikeFn(Object, TLI))
+ return false;
+ // If this is an alloc like fn. There are more constraints we need to verify.
+ // More specifically, we must make sure that the pointer can not escape.
+ //
+ // NOTE: PointerMayBeCaptured is not enough as the pointer may have escaped
+ // even though its not captured by the enclosing function. Standard allocation
+ // functions like malloc, calloc, and operator new return values which can
+ // be assumed not to have previously escaped.
+ if (PointerMayBeCaptured(Object, true, true))
+ return false;
+ IsKnownNonEscapingObject = true;
+ }
}
// Check that all of the pointers in the alias set have the same type. We
@@ -1029,8 +1145,11 @@ bool llvm::promoteLoopAccessesToScalars(
// it.
if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
assert(!Load->isVolatile() && "AST broken");
- if (!Load->isSimple())
+ if (!Load->isUnordered())
return false;
+
+ SawUnorderedAtomic |= Load->isAtomic();
+ SawNotAtomic |= !Load->isAtomic();
if (!DereferenceableInPH)
DereferenceableInPH = isSafeToExecuteUnconditionally(
@@ -1041,9 +1160,12 @@ bool llvm::promoteLoopAccessesToScalars(
if (UI->getOperand(1) != ASIV)
continue;
assert(!Store->isVolatile() && "AST broken");
- if (!Store->isSimple())
+ if (!Store->isUnordered())
return false;
+ SawUnorderedAtomic |= Store->isAtomic();
+ SawNotAtomic |= !Store->isAtomic();
+
// If the store is guaranteed to execute, both properties are satisfied.
// We may want to check if a store is guaranteed to execute even if we
// already know that promotion is safe, since it may have higher
@@ -1096,6 +1218,12 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
+ // If we found both an unordered atomic instruction and a non-atomic memory
+ // access, bail. We can't blindly promote non-atomic to atomic since we
+ // might not be able to lower the result. We can't downgrade since that
+ // would violate memory model. Also, align 0 is an error for atomics.
+ if (SawUnorderedAtomic && SawNotAtomic)
+ return false;
// If we couldn't prove we can hoist the load, bail.
if (!DereferenceableInPH)
@@ -1106,10 +1234,15 @@ bool llvm::promoteLoopAccessesToScalars(
// stores along paths which originally didn't have them without violating the
// memory model.
if (!SafeToInsertStore) {
- Value *Object = GetUnderlyingObject(SomePtr, MDL);
- SafeToInsertStore =
- (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
+ // If this is a known non-escaping object, it is safe to insert the stores.
+ if (IsKnownNonEscapingObject)
+ SafeToInsertStore = true;
+ else {
+ Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ SafeToInsertStore =
+ (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
!PointerMayBeCaptured(Object, true, true);
+ }
}
// If we've still failed to prove we can sink the store, give up.
@@ -1134,12 +1267,15 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
+ InsertPts, PIC, *CurAST, *LI, DL, Alignment,
+ SawUnorderedAtomic, AATags);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad = new LoadInst(
SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
+ if (SawUnorderedAtomic)
+ PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DL);
if (AATags)
OpenPOWER on IntegriCloud