summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp265
1 files changed, 160 insertions, 105 deletions
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c8007a5..ede4041 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,9 +18,11 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -38,6 +40,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -111,6 +114,10 @@ static cl::opt<bool> StressExtLdPromotion(
cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
"optimization in CodeGenPrepare"));
+static cl::opt<bool> DisablePreheaderProtect(
+ "disable-preheader-prot", cl::Hidden, cl::init(false),
+ cl::desc("Disable protection against removing loop preheaders"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
@@ -122,6 +129,7 @@ class TypePromotionTransaction;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
+ const LoopInfo *LI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -158,9 +166,10 @@ class TypePromotionTransaction;
const char *getPassName() const override { return "CodeGen Prepare"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<DominatorTreeWrapperPass>();
+ // FIXME: When we can selectively preserve passes, preserve the domtree.
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
}
private:
@@ -203,7 +212,7 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
}
bool CodeGenPrepare::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
DL = &F.getParent()->getDataLayout();
@@ -218,6 +227,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
OptSize = F.optForSize();
/// This optimization identifies DIV instructions that can be
@@ -359,6 +369,15 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
/// edges in ways that are non-optimal for isel. Start by eliminating these
/// blocks so we can split them the way we want them.
bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
+ SmallPtrSet<BasicBlock *, 16> Preheaders;
+ SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
+ while (!LoopList.empty()) {
+ Loop *L = LoopList.pop_back_val();
+ LoopList.insert(LoopList.end(), L->begin(), L->end());
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ Preheaders.insert(Preheader);
+ }
+
bool MadeChange = false;
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
@@ -391,6 +410,14 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
if (!canMergeBlocks(BB, DestBB))
continue;
+ // Do not delete loop preheaders if doing so would create a critical edge.
+ // Loop preheaders can be good locations to spill registers. If the
+ // preheader is deleted and we create a critical edge, registers may be
+ // spilled in the loop body instead.
+ if (!DisablePreheaderProtect && Preheaders.count(BB) &&
+ !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
+ continue;
+
eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
@@ -612,7 +639,8 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
continue;
// Create a Builder and replace the target callsite with a gep
- assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator");
+ assert(RelocatedBase->getNextNode() &&
+ "Should always have one since it's not a terminator");
// Insert after RelocatedBase
IRBuilder<> Builder(RelocatedBase->getNextNode());
@@ -730,6 +758,11 @@ static bool SinkCast(CastInst *CI) {
// Preincrement use iterator so we don't invalidate it.
++UI;
+ // The first insertion point of a block containing an EH pad is after the
+ // pad. If the pad is the user, we cannot sink the cast past the pad.
+ if (User->isEHPad())
+ continue;
+
// If the block selected to receive the cast is an EH pad that does not
// allow non-PHI instructions before the terminator, we can't sink the
// cast.
@@ -854,10 +887,14 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool SinkCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
BasicBlock *DefBB = CI->getParent();
- /// Only insert a cmp in each block once.
+ // Avoid sinking soft-FP comparisons, since this can move them into a loop.
+ if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
+ return false;
+
+ // Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
@@ -905,8 +942,8 @@ static bool SinkCmpExpression(CmpInst *CI) {
return MadeChange;
}
-static bool OptimizeCmpExpression(CmpInst *CI) {
- if (SinkCmpExpression(CI))
+static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
+ if (SinkCmpExpression(CI, TLI))
return true;
if (CombineUAddWithOverflow(CI))
@@ -1138,7 +1175,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
// %13 = icmp eq i1 %12, true
// br i1 %13, label %cond.load4, label %else5
//
-static void ScalarizeMaskedLoad(CallInst *CI) {
+static void scalarizeMaskedLoad(CallInst *CI) {
Value *Ptr = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -1284,7 +1321,7 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
// store i32 %8, i32* %9
// br label %else2
// . . .
-static void ScalarizeMaskedStore(CallInst *CI) {
+static void scalarizeMaskedStore(CallInst *CI) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -1403,7 +1440,7 @@ static void ScalarizeMaskedStore(CallInst *CI) {
// . . .
// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
-static void ScalarizeMaskedGather(CallInst *CI) {
+static void scalarizeMaskedGather(CallInst *CI) {
Value *Ptrs = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -1538,7 +1575,7 @@ static void ScalarizeMaskedGather(CallInst *CI) {
// store i32 % Elt1, i32* % Ptr1, align 4
// br label %else2
// . . .
-static void ScalarizeMaskedScatter(CallInst *CI) {
+static void scalarizeMaskedScatter(CallInst *CI) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -1653,7 +1690,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Only handle legal scalar cases. Anything else requires too much work.
Type *Ty = CountZeros->getType();
unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
- if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize())
+ if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
return false;
// The intrinsic will be sunk behind a compare against zero and branch.
@@ -1743,8 +1780,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
// forbidden.
GlobalVariable *GV;
if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
- GV->getAlignment() < PrefAlign &&
- DL->getTypeAllocSize(GV->getType()->getElementType()) >=
+ GV->getPointerAlignment(*DL) < PrefAlign &&
+ DL->getTypeAllocSize(GV->getValueType()) >=
MinSize + Offset2)
GV->setAlignment(PrefAlign);
}
@@ -1759,27 +1796,47 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
}
+ // If we have a cold call site, try to sink addressing computation into the
+ // cold block. This interacts with our handling for loads and stores to
+ // ensure that we can fold all uses of a potential addressing computation
+ // into their uses. TODO: generalize this to work over profiling data
+ if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ for (auto &Arg : CI->arg_operands()) {
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned AS = Arg->getType()->getPointerAddressSpace();
+ return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::objectsize: {
// Lower all uses of llvm.objectsize.*
- bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ uint64_t Size;
Type *ReturnTy = CI->getType();
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-
+ Constant *RetVal = nullptr;
+ ConstantInt *Op1 = cast<ConstantInt>(II->getArgOperand(1));
+ ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min;
+ if (getObjectSize(II->getArgOperand(0),
+ Size, *DL, TLInfo, false, Mode)) {
+ RetVal = ConstantInt::get(ReturnTy, Size);
+ } else {
+ RetVal = ConstantInt::get(ReturnTy,
+ Mode == ObjSizeMode::Min ? 0 : -1ULL);
+ }
// Substituting this can cause recursive simplifications, which can
// invalidate our iterator. Use a WeakVH to hold onto it in case this
// happens.
- WeakVH IterHandle(&*CurInstIterator);
+ Value *CurValue = &*CurInstIterator;
+ WeakVH IterHandle(CurValue);
- replaceAndRecursivelySimplify(CI, RetVal,
- TLInfo, nullptr);
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
- if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
+ if (IterHandle != CurValue) {
CurInstIterator = BB->begin();
SunkAddrs.clear();
}
@@ -1788,7 +1845,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
case Intrinsic::masked_load: {
// Scalarize unsupported vector masked load
if (!TTI->isLegalMaskedLoad(CI->getType())) {
- ScalarizeMaskedLoad(CI);
+ scalarizeMaskedLoad(CI);
ModifiedDT = true;
return true;
}
@@ -1796,7 +1853,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
case Intrinsic::masked_store: {
if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
- ScalarizeMaskedStore(CI);
+ scalarizeMaskedStore(CI);
ModifiedDT = true;
return true;
}
@@ -1804,7 +1861,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
case Intrinsic::masked_gather: {
if (!TTI->isLegalMaskedGather(CI->getType())) {
- ScalarizeMaskedGather(CI);
+ scalarizeMaskedGather(CI);
ModifiedDT = true;
return true;
}
@@ -1812,7 +1869,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
case Intrinsic::masked_scatter: {
if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
- ScalarizeMaskedScatter(CI);
+ scalarizeMaskedScatter(CI);
ModifiedDT = true;
return true;
}
@@ -2076,7 +2133,7 @@ void ExtAddrMode::print(raw_ostream &OS) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ExtAddrMode::dump() const {
+LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
print(dbgs());
dbgs() << '\n';
}
@@ -3442,6 +3499,8 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
+ const bool OptSize = I->getFunction()->optForSize();
+
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
Instruction *UserI = cast<Instruction>(U.getUser());
@@ -3459,6 +3518,11 @@ static bool FindAllMemoryUses(
}
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
+ // If this is a cold call, we can sink the addressing calculation into
+ // the cold path. See optimizeCallInst
+ if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ continue;
+
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
if (!IA) return true;
@@ -3550,10 +3614,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
if (!BaseReg && !ScaledReg)
return true;
- // If all uses of this instruction are ultimately load/store/inlineasm's,
- // check to see if their addressing modes will include this instruction. If
- // so, we can fold it into all uses, so it doesn't matter if it has multiple
- // uses.
+ // If all uses of this instruction can have the address mode sunk into them,
+ // we can remove the addressing mode and effectively trade one live register
+ // for another (at worst.) In this context, folding an addressing mode into
+ // the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
@@ -3561,8 +3625,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// Now that we know that all uses of this instruction are part of a chain of
// computation involving only operations that could theoretically be folded
- // into a memory use, loop over each of these uses and see if they could
- // *actually* fold the instruction.
+ // into a memory use, loop over each of these memory operation uses and see
+ // if they could *actually* fold the instruction. The assumption is that
+ // addressing modes are cheap and that duplicating the computation involved
+ // many times is worthwhile, even on a fastpath. For sinking candidates
+ // (i.e. cold call sites), this serves as a way to prevent excessive code
+ // growth since most architectures have some reasonable small and fast way to
+ // compute an effective address. (i.e LEA on x86)
SmallVector<Instruction*, 32> MatchedAddrModeInsts;
for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
Instruction *User = MemoryUses[i].first;
@@ -3616,6 +3685,11 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
return false;
}
+/// Sink addressing mode computation immediate before MemoryInst if doing so
+/// can be done without increasing register pressure. The need for the
+/// register pressure constraint means this can end up being an all or nothing
+/// decision for all uses of the same addressing computation.
+///
/// Load and Store Instructions often have addressing modes that can do
/// significant amounts of computation. As such, instruction selection will try
/// to get the load or store to do as much computation as possible for the
@@ -3623,7 +3697,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// such, we sink as much legal addressing mode work into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
-/// operands.
+/// operands. It's also used to sink addressing computations feeding into cold
+/// call sites into their (cold) basic block.
+///
+/// The motivation for handling sinking into cold blocks is that doing so can
+/// both enable other address mode sinking (by satisfying the register pressure
+/// constraint above), and reduce register pressure globally (by removing the
+/// addressing mode computation from the fast path entirely.).
bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy, unsigned AddrSpace) {
Value *Repl = Addr;
@@ -3662,7 +3742,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
continue;
}
- // For non-PHIs, determine the addressing mode being computed.
+ // For non-PHIs, determine the addressing mode being computed. Note that
+ // the result may differ depending on what other uses our candidate
+ // addressing instructions might have.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
@@ -3945,12 +4027,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (Repl->use_empty()) {
// This can cause recursive deletion, which can invalidate our iterator.
// Use a WeakVH to hold onto it in case this happens.
- WeakVH IterHandle(&*CurInstIterator);
+ Value *CurValue = &*CurInstIterator;
+ WeakVH IterHandle(CurValue);
BasicBlock *BB = CurInstIterator->getParent();
RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
- if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
+ if (IterHandle != CurValue) {
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
CurInstIterator = BB->begin();
@@ -4461,11 +4544,27 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
/// Returns true if a SelectInst should be turned into an explicit branch.
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+ const TargetLowering *TLI,
SelectInst *SI) {
+ // If even a predictable select is cheap, then a branch can't be cheaper.
+ if (!TLI->isPredictableSelectExpensive())
+ return false;
+
// FIXME: This should use the same heuristics as IfConversion to determine
- // whether a select is better represented as a branch. This requires that
- // branch probability metadata is preserved for the select, which is not the
- // case currently.
+ // whether a select is better represented as a branch.
+
+ // If metadata tells us that the select condition is obviously predictable,
+ // then we want to replace the select with a branch.
+ uint64_t TrueWeight, FalseWeight;
+ if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t Max = std::max(TrueWeight, FalseWeight);
+ uint64_t Sum = TrueWeight + FalseWeight;
+ if (Sum != 0) {
+ auto Probability = BranchProbability::getBranchProbability(Max, Sum);
+ if (Probability > TLI->getPredictableBranchThreshold())
+ return true;
+ }
+ }
CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
@@ -4475,17 +4574,6 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
if (!Cmp || !Cmp->hasOneUse())
return false;
- Value *CmpOp0 = Cmp->getOperand(0);
- Value *CmpOp1 = Cmp->getOperand(1);
-
- // Emit "cmov on compare with a memory operand" as a branch to avoid stalls
- // on a load from memory. But if the load is used more than once, do not
- // change the select to a branch because the load is probably needed
- // regardless of whether the branch is taken or not.
- if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
- (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()))
- return true;
-
// If either operand of the select is expensive and only needed on one side
// of the select, we should form a branch.
if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
@@ -4502,7 +4590,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
// Can we convert the 'select' to CF ?
- if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
+ if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||
+ SI->getMetadata(LLVMContext::MD_unpredictable))
return false;
TargetLowering::SelectSupportKind SelectKind;
@@ -4513,14 +4602,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
else
SelectKind = TargetLowering::ScalarValSelect;
- // Do we have efficient codegen support for this kind of 'selects' ?
- if (TLI->isSelectSupported(SelectKind)) {
- // We have efficient codegen support for the select instruction.
- // Check if it is profitable to keep this 'select'.
- if (!TLI->isPredictableSelectExpensive() ||
- !isFormingBranchFromSelectProfitable(TTI, SI))
- return false;
- }
+ if (TLI->isSelectSupported(SelectKind) &&
+ !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
+ return false;
ModifiedDT = true;
@@ -5145,7 +5229,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (!TLI || !TLI->hasMultipleConditionRegisters())
- return OptimizeCmpExpression(CI);
+ return OptimizeCmpExpression(CI, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
stripInvariantGroupMetadata(*LI);
@@ -5221,7 +5305,7 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL,
return false;
SmallVector<Instruction*, 4> Insts;
- if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts))
+ if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
return false;
Instruction *LastInst = Insts.back();
I.replaceAllUsesWith(LastInst);
@@ -5249,12 +5333,13 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
for (auto &I : reverse(BB)) {
if (makeBitReverse(I, *DL, *TLI)) {
MadeBitReverse = MadeChange = true;
+ ModifiedDT = true;
break;
}
}
}
MadeChange |= dupRetToEnableTailCallOpts(&BB);
-
+
return MadeChange;
}
@@ -5310,43 +5395,38 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
return false;
bool MadeChange = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = &*I++;
-
+ for (BasicBlock &BB : F) {
// Does this BB end with the following?
// %andVal = and %val, #single-bit-set
// %icmpVal = icmp %andResult, 0
// br i1 %cmpVal label %dest1, label %dest2"
- BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
+ BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator());
if (!Brcc || !Brcc->isConditional())
continue;
ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
- if (!Cmp || Cmp->getParent() != BB)
+ if (!Cmp || Cmp->getParent() != &BB)
continue;
ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
if (!Zero || !Zero->isZero())
continue;
Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
- if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
+ if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB)
continue;
ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
continue;
- DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
+ DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump());
// Push the "and; icmp" for any users that are conditional branches.
// Since there can only be one branch use per BB, we don't need to keep
// track of which BBs we insert into.
- for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
- UI != E; ) {
- Use &TheUse = *UI;
+ for (Use &TheUse : Cmp->uses()) {
// Find brcc use.
- BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
- ++UI;
+ BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
if (!BrccUser || !BrccUser->isConditional())
continue;
BasicBlock *UserBB = BrccUser->getParent();
- if (UserBB == BB) continue;
+ if (UserBB == &BB) continue;
DEBUG(dbgs() << "found Brcc use\n");
// Sink the "and; icmp" to use.
@@ -5365,29 +5445,6 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
return MadeChange;
}
-/// \brief Retrieve the probabilities of a conditional branch. Returns true on
-/// success, or returns false if no or invalid metadata was found.
-static bool extractBranchMetadata(BranchInst *BI,
- uint64_t &ProbTrue, uint64_t &ProbFalse) {
- assert(BI->isConditional() &&
- "Looking for probabilities on unconditional branch?");
- auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
- if (!ProfileData || ProfileData->getNumOperands() != 3)
- return false;
-
- const auto *CITrue =
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
- const auto *CIFalse =
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
- if (!CITrue || !CIFalse)
- return false;
-
- ProbTrue = CITrue->getValue().getZExtValue();
- ProbFalse = CIFalse->getValue().getZExtValue();
-
- return true;
-}
-
/// \brief Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
@@ -5456,11 +5513,9 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
// Create a new BB.
- auto *InsertBefore = std::next(Function::iterator(BB))
- .getNodePtrUnchecked();
- auto TmpBB = BasicBlock::Create(BB.getContext(),
- BB.getName() + ".cond.split",
- BB.getParent(), InsertBefore);
+ auto TmpBB =
+ BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
+ BB.getParent(), BB.getNextNode());
// Update original basic block by using the first condition directly by the
// branch instruction and removing the no longer needed and/or instruction.
@@ -5535,7 +5590,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
uint64_t TrueWeight, FalseWeight;
- if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
+ if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
uint64_t NewTrueWeight = TrueWeight;
uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
scaleWeights(NewTrueWeight, NewFalseWeight);
@@ -5568,7 +5623,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// assumes that
// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
uint64_t TrueWeight, FalseWeight;
- if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
+ if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
uint64_t NewFalseWeight = FalseWeight;
scaleWeights(NewTrueWeight, NewFalseWeight);
OpenPOWER on IntegriCloud