summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp821
1 files changed, 492 insertions, 329 deletions
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 82cd380..6a81403 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -32,13 +33,13 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -72,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
+static cl::opt<bool>
+ DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable GC optimizations in CodeGenPrepare"));
+
static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
@@ -119,7 +124,6 @@ class TypePromotionTransaction;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
- DominatorTree *DT;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
@@ -137,8 +141,7 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
- /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
- /// be updated.
+ /// ModifiedDT - If CFG is modified in anyway.
bool ModifiedDT;
/// OptSize - True if optimizing for size.
@@ -156,8 +159,8 @@ class TypePromotionTransaction;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfo>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
@@ -167,7 +170,8 @@ class TypePromotionTransaction;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
bool OptimizeInst(Instruction *I, bool& ModifiedDT);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr,
+ Type *AccessTy, unsigned AS);
bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
bool MoveExtToFormExtLoad(Instruction *&I);
@@ -181,8 +185,9 @@ class TypePromotionTransaction;
bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInst);
+ unsigned CreatedInstCost);
bool splitBranchCondition(Function &F);
+ bool simplifyOffsetableRelocate(Instruction &I);
};
}
@@ -205,14 +210,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
ModifiedDT = false;
if (TM)
- TLI = TM->getSubtargetImpl()->getTargetLowering();
- TLInfo = &getAnalysis<TargetLibraryInfo>();
- TTI = &getAnalysis<TargetTransformInfo>();
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
- OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
@@ -248,9 +249,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BasicBlock *BB = I++;
bool ModifiedDTOnIteration = false;
MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
-
+
// Restart BB iteration if the dominator tree of the Function was changed
- ModifiedDT |= ModifiedDTOnIteration;
if (ModifiedDTOnIteration)
break;
}
@@ -293,13 +293,18 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (EverMadeChange || MadeChange)
MadeChange |= EliminateFallThrough(F);
- if (MadeChange)
- ModifiedDT = true;
EverMadeChange |= MadeChange;
}
- if (ModifiedDT && DT)
- DT->recalculate(F);
+ if (!DisableGCOpts) {
+ SmallVector<Instruction *, 2> Statepoints;
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (isStatepoint(I))
+ Statepoints.push_back(&I);
+ for (auto &I : Statepoints)
+ EverMadeChange |= simplifyOffsetableRelocate(*I);
+ }
return EverMadeChange;
}
@@ -326,7 +331,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// Remember if SinglePred was the entry block of the function.
// If so, we will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(BB, this);
+ MergeBasicBlockIntoOnlyPred(BB, nullptr);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -466,7 +471,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, this);
+ MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -508,19 +513,188 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
- if (DT && !ModifiedDT) {
- BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
- BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
- DT->changeImmediateDominator(DestBB, NewIDom);
- DT->eraseNode(BB);
- }
BB->eraseFromParent();
++NumBlocksElim;
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
+// Computes a map of base pointer relocation instructions to corresponding
+// derived pointer relocation instructions given a vector of all relocate calls
+static void computeBaseDerivedRelocateMap(
+ const SmallVectorImpl<User *> &AllRelocateCalls,
+ DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
+ RelocateInstMap) {
+ // Collect information in two maps: one primarily for locating the base object
+ // while filling the second map; the second map is the final structure holding
+ // a mapping between Base and corresponding Derived relocate calls
+ DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
+ for (auto &U : AllRelocateCalls) {
+ GCRelocateOperands ThisRelocate(U);
+ IntrinsicInst *I = cast<IntrinsicInst>(U);
+ auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
+ ThisRelocate.getDerivedPtrIndex());
+ RelocateIdxMap.insert(std::make_pair(K, I));
+ }
+ for (auto &Item : RelocateIdxMap) {
+ std::pair<unsigned, unsigned> Key = Item.first;
+ if (Key.first == Key.second)
+ // Base relocation: nothing to insert
+ continue;
+
+ IntrinsicInst *I = Item.second;
+ auto BaseKey = std::make_pair(Key.first, Key.first);
+
+ // We're iterating over RelocateIdxMap so we cannot modify it.
+ auto MaybeBase = RelocateIdxMap.find(BaseKey);
+ if (MaybeBase == RelocateIdxMap.end())
+ // TODO: We might want to insert a new base object relocate and gep off
+ // that, if there are enough derived object relocates.
+ continue;
+
+ RelocateInstMap[MaybeBase->second].push_back(I);
+ }
+}
+
+// Accepts a GEP and extracts the operands into a vector provided they're all
+// small integer constants
+static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
+ SmallVectorImpl<Value *> &OffsetV) {
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+ // Only accept small constant integer operands
+ auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!Op || Op->getZExtValue() > 20)
+ return false;
+ }
+
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++)
+ OffsetV.push_back(GEP->getOperand(i));
+ return true;
+}
+
+// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
+// replace, computes a replacement, and affects it.
+static bool
+simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
+ const SmallVectorImpl<IntrinsicInst *> &Targets) {
+ bool MadeChange = false;
+ for (auto &ToReplace : Targets) {
+ GCRelocateOperands MasterRelocate(RelocatedBase);
+ GCRelocateOperands ThisRelocate(ToReplace);
+
+ assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
+ "Not relocating a derived object of the original base object");
+ if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
+ // A duplicate relocate call. TODO: coalesce duplicates.
+ continue;
+ }
+
+ Value *Base = ThisRelocate.getBasePtr();
+ auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
+ if (!Derived || Derived->getPointerOperand() != Base)
+ continue;
+
+ SmallVector<Value *, 2> OffsetV;
+ if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
+ continue;
+
+ // Create a Builder and replace the target callsite with a gep
+ assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator");
+
+ // Insert after RelocatedBase
+ IRBuilder<> Builder(RelocatedBase->getNextNode());
+ Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+
+ // If gc_relocate does not match the actual type, cast it to the right type.
+ // In theory, there must be a bitcast after gc_relocate if the type does not
+ // match, and we should reuse it to get the derived pointer. But it could be
+ // cases like this:
+ // bb1:
+ // ...
+ // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+ // br label %merge
+ //
+ // bb2:
+ // ...
+ // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+ // br label %merge
+ //
+ // merge:
+ // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
+ // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
+ //
+ // In this case, we can not find the bitcast any more. So we insert a new bitcast
+ // no matter there is already one or not. In this way, we can handle all cases, and
+ // the extra bitcast should be optimized away in later passes.
+ Instruction *ActualRelocatedBase = RelocatedBase;
+ if (RelocatedBase->getType() != Base->getType()) {
+ ActualRelocatedBase =
+ cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
+ }
+ Value *Replacement = Builder.CreateGEP(
+ Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
+ Instruction *ReplacementInst = cast<Instruction>(Replacement);
+ Replacement->takeName(ToReplace);
+ // If the newly generated derived pointer's type does not match the original derived
+ // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
+ Instruction *ActualReplacement = ReplacementInst;
+ if (ReplacementInst->getType() != ToReplace->getType()) {
+ ActualReplacement =
+ cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
+ }
+ ToReplace->replaceAllUsesWith(ActualReplacement);
+ ToReplace->eraseFromParent();
+
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+// Turns this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = relocate(%tok, i32 4, i32 4)
+// %ptr' = relocate(%tok, i32 4, i32 5)
+// %val = load %ptr'
+//
+// into this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = gc.relocate(%tok, i32 4, i32 4)
+// %ptr' = gep %base' + 15
+// %val = load %ptr'
+bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
+ bool MadeChange = false;
+ SmallVector<User *, 2> AllRelocateCalls;
+
+ for (auto *U : I.users())
+ if (isGCRelocate(dyn_cast<Instruction>(U)))
+ // Collect all the relocate calls associated with a statepoint
+ AllRelocateCalls.push_back(U);
+
+ // We need atleast one base pointer relocation + one derived pointer
+ // relocation to mangle
+ if (AllRelocateCalls.size() < 2)
+ return false;
+
+ // RelocateInstMap is a mapping from the base relocate instruction to the
+ // corresponding derived relocate instructions
+ DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
+ computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
+ if (RelocateInstMap.empty())
+ return false;
+
+ for (auto &Item : RelocateInstMap)
+ // Item.first is the RelocatedBase to offset against
+ // Item.second is the vector of Targets to replace
+ MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
+ return MadeChange;
+}
+
/// SinkCast - Sink the specified cast instruction into its user blocks
static bool SinkCast(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();
@@ -555,11 +729,11 @@ static bool SinkCast(CastInst *CI) {
InsertedCast =
CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
InsertPt);
- MadeChange = true;
}
// Replace a use of the cast with a use of the new cast.
TheUse = InsertedCast;
+ MadeChange = true;
++NumCastUses;
}
@@ -609,13 +783,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
return SinkCast(CI);
}
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+ Value *A, *B;
+ Instruction *AddI;
+ if (!match(CI,
+ m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+ return false;
+
+ Type *Ty = AddI->getType();
+ if (!isa<IntegerType>(Ty))
+ return false;
+
+ // We don't want to move around uses of condition values this late, so we we
+ // check if it is legal to create the call to the intrinsic in the basic
+ // block containing the icmp:
+
+ if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+ return false;
+
+#ifndef NDEBUG
+ // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+ // for now:
+ if (AddI->hasOneUse())
+ assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+ Module *M = CI->getParent()->getParent()->getParent();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+ auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+ auto *UAddWithOverflow =
+ CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+ auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+ auto *Overflow =
+ ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+ CI->replaceAllUsesWith(Overflow);
+ AddI->replaceAllUsesWith(UAdd);
+ CI->eraseFromParent();
+ AddI->eraseFromParent();
+ return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
/// the number of virtual registers that must be created and coalesced. This is
/// a clear win except on targets with multiple condition code registers
/// (PowerPC), where it might lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
BasicBlock *DefBB = CI->getParent();
/// InsertedCmp - Only insert a cmp in each block once.
@@ -649,21 +870,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
CmpInst::Create(CI->getOpcode(),
CI->getPredicate(), CI->getOperand(0),
CI->getOperand(1), "", InsertPt);
- MadeChange = true;
}
// Replace a use of the cmp with a use of the new cmp.
TheUse = InsertedCmp;
+ MadeChange = true;
++NumCmpUses;
}
// If we removed all uses, nuke the cmp.
- if (CI->use_empty())
+ if (CI->use_empty()) {
CI->eraseFromParent();
+ MadeChange = true;
+ }
return MadeChange;
}
+static bool OptimizeCmpExpression(CmpInst *CI) {
+ if (SinkCmpExpression(CI))
+ return true;
+
+ if (CombineUAddWithOverflow(CI))
+ return true;
+
+ return false;
+}
+
/// isExtractBitsCandidateUse - Check if the candidates could
/// be combined with shift instruction, which includes:
/// 1. Truncate instruction
@@ -943,8 +1176,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
//
CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Builder.SetInsertPoint(InsertPt);
-
- Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
LoadInst* Load = Builder.CreateLoad(Gep, false);
VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
@@ -1038,7 +1272,8 @@ static void ScalarizeMaskedStore(CallInst *CI) {
Builder.SetInsertPoint(InsertPt);
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
Builder.CreateStore(OneElt, Gep);
// Create "else" block, fill it in the next iteration
@@ -1072,6 +1307,54 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return true;
}
+ const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
+
+ // Align the pointer arguments to this call if the target thinks it's a good
+ // idea
+ unsigned MinSize, PrefAlign;
+ if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ for (auto &Arg : CI->arg_operands()) {
+ // We want to align both objects whose address is used directly and
+ // objects whose address is used in casts and GEPs, though it only makes
+ // sense for GEPs if the offset is a multiple of the desired alignment and
+ // if size - offset meets the size threshold.
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ APInt Offset(TD->getPointerSizeInBits(
+ cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
+ Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+ uint64_t Offset2 = Offset.getLimitedValue();
+ if ((Offset2 & (PrefAlign-1)) != 0)
+ continue;
+ AllocaInst *AI;
+ if ((AI = dyn_cast<AllocaInst>(Val)) &&
+ AI->getAlignment() < PrefAlign &&
+ TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+ AI->setAlignment(PrefAlign);
+ // Global variables can only be aligned if they are defined in this
+ // object (i.e. they are uniquely initialized in this object), and
+ // over-aligning global variables that have an explicit section is
+ // forbidden.
+ GlobalVariable *GV;
+ if ((GV = dyn_cast<GlobalVariable>(Val)) &&
+ GV->hasUniqueInitializer() &&
+ !GV->hasSection() &&
+ GV->getAlignment() < PrefAlign &&
+ TD->getTypeAllocSize(
+ GV->getType()->getElementType()) >= MinSize + Offset2)
+ GV->setAlignment(PrefAlign);
+ }
+ // If this is a memcpy (or similar) then we may be able to improve the
+ // alignment
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+ unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+ if (Align > MI->getAlignment())
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+ }
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
@@ -1088,8 +1371,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
WeakVH IterHandle(CurInstIterator);
replaceAndRecursivelySimplify(CI, RetVal,
- TLI ? TLI->getDataLayout() : nullptr,
- TLInfo, ModifiedDT ? nullptr : DT);
+ TLInfo, nullptr);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
@@ -1116,14 +1398,28 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
return false;
}
+ case Intrinsic::aarch64_stlxr:
+ case Intrinsic::aarch64_stxr: {
+ ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+ if (!ExtVal || !ExtVal->hasOneUse() ||
+ ExtVal->getParent() == CI->getParent())
+ return false;
+ // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
+ ExtVal->moveBefore(CI);
+ return true;
+ }
}
if (TLI) {
+ // Unknown address space.
+ // TODO: Target hook to pick which address space the intrinsic cares
+ // about?
+ unsigned AddrSpace = ~0u;
SmallVector<Value*, 2> PtrOps;
Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
return true;
}
}
@@ -1131,15 +1427,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
// From here on out we're working with named functions.
if (!CI->getCalledFunction()) return false;
- // We'll need DataLayout from here on out.
- const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
- if (!TD) return false;
-
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
// to fortified library functions (e.g. __memcpy_chk) that have the default
// "don't know" as the objectsize. Anything else should be left alone.
- FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true);
+ FortifiedLibCallSimplifier Simplifier(TLInfo, true);
if (Value *V = Simplifier.optimizeCall(CI)) {
CI->replaceAllUsesWith(V);
CI->eraseFromParent();
@@ -1672,7 +1964,7 @@ class TypePromotionTransaction {
Inst->removeFromParent();
}
- ~InstructionRemover() { delete Replacer; }
+ ~InstructionRemover() override { delete Replacer; }
/// \brief Really remove the instruction.
void commit() override { delete Inst; }
@@ -1802,11 +2094,13 @@ void TypePromotionTransaction::rollback(
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
SmallVectorImpl<Instruction*> &AddrModeInsts;
+ const TargetMachine &TM;
const TargetLowering &TLI;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
/// the memory instruction that we're computing this address for.
Type *AccessTy;
+ unsigned AddrSpace;
Instruction *MemoryInst;
/// AddrMode - This is the addressing mode that we're building up. This is
@@ -1825,13 +2119,16 @@ class AddressingModeMatcher {
/// always returns true.
bool IgnoreProfitability;
- AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
- const TargetLowering &T, Type *AT,
+ AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
+ const TargetMachine &TM, Type *AT, unsigned AS,
Instruction *MI, ExtAddrMode &AM,
const SetOfInstrs &InsertedTruncs,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
- : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
+ : AddrModeInsts(AMI), TM(TM),
+ TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
+ ->getTargetLowering()),
+ AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
IgnoreProfitability = false;
}
@@ -1845,16 +2142,16 @@ public:
/// optimizations.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p The ongoing transaction where every action should be registered.
- static ExtAddrMode Match(Value *V, Type *AccessTy,
+ static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetLowering &TLI,
+ const TargetMachine &TM,
const SetOfInstrs &InsertedTruncs,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
MemoryInst, Result, InsertedTruncs,
PromotedInsts, TPT).MatchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -1869,7 +2166,7 @@ private:
ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter);
bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
- bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
+ bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
Value *PromotedOperand) const;
};
@@ -1900,7 +2197,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
TestAddrMode.ScaledReg = ScaleReg;
// If the new address isn't legal, bail out.
- if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+ if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace))
return false;
// It was legal, so commit it.
@@ -1917,7 +2214,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
// If this addressing mode is legal, commit it and remember that we folded
// this instruction.
- if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+ if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) {
AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
AddrMode = TestAddrMode;
return true;
@@ -2003,7 +2300,7 @@ class TypePromotionHelper {
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is a promotable trunc or sext or zext.
/// \p PromotedInsts maps the instructions to their type before promotion.
- /// \p CreatedInsts[out] contains how many non-free instructions have been
+ /// \p CreatedInstsCost[out] contains the cost of all instructions
/// created to promote the operand of Ext.
/// Newly added extensions are inserted in \p Exts.
/// Newly added truncates are inserted in \p Truncs.
@@ -2011,53 +2308,55 @@ class TypePromotionHelper {
/// \return The promoted value which is used instead of Ext.
static Value *promoteOperandForTruncAndAnyExt(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs);
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
- /// \p CreatedInsts[out] contains how many non-free instructions have been
+ /// \p CreatedInstsCost[out] contains the cost of all the instructions
/// created to promote the operand of Ext.
/// Newly added extensions are inserted in \p Exts.
/// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *
- promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
+ static Value *promoteOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI, bool IsSExt);
/// \see promoteOperandForOther.
- static Value *
- signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
- Truncs, true);
+ static Value *signExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, true);
}
/// \see promoteOperandForOther.
- static Value *
- zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
- Truncs, false);
+ static Value *zeroExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, false);
}
public:
/// Type for the utility function that promotes the operand of Ext.
typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs);
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI);
/// \brief Given a sign/zero extend instruction \p Ext, return the approriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
@@ -2174,16 +2473,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
Value *ExtVal = SExt;
+ bool HasMergedNonFreeExt = false;
if (isa<ZExtInst>(SExtOpnd)) {
// Replace s|zext(zext(opnd))
// => zext(opnd).
+ HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
Value *ZExt =
TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
TPT.replaceAllUsesWith(SExt, ZExt);
@@ -2194,7 +2495,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// => z|sext(opnd).
TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
}
- CreatedInsts = 0;
+ CreatedInstsCost = 0;
// Remove dead code.
if (SExtOpnd->use_empty())
@@ -2203,8 +2504,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// Check if the extension is still needed.
Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
- if (ExtInst && Exts)
- Exts->push_back(ExtInst);
+ if (ExtInst) {
+ if (Exts)
+ Exts->push_back(ExtInst);
+ CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
+ }
return ExtVal;
}
@@ -2217,13 +2521,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
Value *TypePromotionHelper::promoteOperandForOther(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
+ bool IsSExt) {
// By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
- CreatedInsts = 0;
+ CreatedInstsCost = 0;
if (!ExtOpnd->hasOneUse()) {
// ExtOpnd will be promoted.
// All its uses, but Ext, will need to use a truncated value of the
@@ -2298,7 +2603,6 @@ Value *TypePromotionHelper::promoteOperandForOther(
continue;
}
ExtForOpnd = cast<Instruction>(ValForExtOpnd);
- ++CreatedInsts;
}
if (Exts)
Exts->push_back(ExtForOpnd);
@@ -2307,6 +2611,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
// Move the sign extension before the insertion point.
TPT.moveBefore(ExtForOpnd, ExtOpnd);
TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
+ CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
// If more sext are required, new instructions will have to be created.
ExtForOpnd = nullptr;
}
@@ -2319,22 +2624,22 @@ Value *TypePromotionHelper::promoteOperandForOther(
/// IsPromotionProfitable - Check whether or not promoting an instruction
/// to a wider type was profitable.
-/// \p MatchedSize gives the number of instructions that have been matched
-/// in the addressing mode after the promotion was applied.
-/// \p SizeWithPromotion gives the number of created instructions for
-/// the promotion plus the number of instructions that have been
-/// matched in the addressing mode before the promotion.
+/// \p NewCost gives the cost of extension instructions created by the
+/// promotion.
+/// \p OldCost gives the cost of extension instructions before the promotion
+/// plus the number of instructions that have been
+/// matched in the addressing mode the promotion.
/// \p PromotedOperand is the value that has been promoted.
/// \return True if the promotion is profitable, false otherwise.
-bool
-AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
- unsigned SizeWithPromotion,
- Value *PromotedOperand) const {
- // We folded less instructions than what we created to promote the operand.
+bool AddressingModeMatcher::IsPromotionProfitable(
+ unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
+ DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
+ // The cost of the new extensions is greater than the cost of the
+ // old extension plus what we folded.
// This is not profitable.
- if (MatchedSize < SizeWithPromotion)
+ if (NewCost > OldCost)
return false;
- if (MatchedSize > SizeWithPromotion)
+ if (NewCost < OldCost)
return true;
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
@@ -2374,7 +2679,6 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
// BitCast is always a noop, and we can handle it as long as it is
// int->int or pointer->pointer (we don't want int<->fp or something).
if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
@@ -2385,6 +2689,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
AddrInst->getOperand(0)->getType() != AddrInst->getType())
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
+ case Instruction::AddrSpaceCast: {
+ unsigned SrcAS
+ = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
+ unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
+ if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ }
case Instruction::Add: {
// Check to see if we can merge in the RHS then the LHS. If so, we win.
ExtAddrMode BackupAddrMode = AddrMode;
@@ -2466,7 +2778,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
// just add it to the disp field and check validity.
if (VariableOperand == -1) {
AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+ if (ConstantOffset == 0 ||
+ TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
if (MatchAddr(AddrInst->getOperand(0), Depth+1))
return true;
@@ -2532,9 +2845,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- unsigned CreatedInsts = 0;
+ unsigned CreatedInstsCost = 0;
+ unsigned ExtCost = !TLI.isExtFree(Ext);
Value *PromotedOperand =
- TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
+ TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
@@ -2556,7 +2870,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned OldSize = AddrModeInsts.size();
if (!MatchAddr(PromotedOperand, Depth) ||
- !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
+ // The total of the new cost is equals to the cost of the created
+ // instructions.
+ // The total of the old cost is equals to the cost of the extension plus
+ // what we have saved in the addressing mode.
+ !IsPromotionProfitable(CreatedInstsCost,
+ ExtCost + (AddrModeInsts.size() - OldSize),
PromotedOperand)) {
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
@@ -2583,14 +2902,14 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
// Fold in immediates if legal for the target.
AddrMode.BaseOffs += CI->getSExtValue();
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.BaseOffs -= CI->getSExtValue();
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
// If this is a global variable, try to fold it into the addressing mode.
if (!AddrMode.BaseGV) {
AddrMode.BaseGV = GV;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.BaseGV = nullptr;
}
@@ -2634,7 +2953,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
AddrMode.HasBaseReg = true;
AddrMode.BaseReg = Addr;
// Still check for legality in case the target supports [imm] but not [i+r].
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.HasBaseReg = false;
AddrMode.BaseReg = nullptr;
@@ -2644,7 +2963,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
if (AddrMode.Scale == 0) {
AddrMode.Scale = 1;
AddrMode.ScaledReg = Addr;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.Scale = 0;
AddrMode.ScaledReg = nullptr;
@@ -2658,13 +2977,17 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
/// inline asm call are due to memory operands. If so, return true, otherwise
/// return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetLowering &TLI) {
- TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ const TargetMachine &TM) {
+ const Function *F = CI->getParent()->getParent();
+ const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, SDValue());
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
@@ -2680,10 +3003,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
/// memory use. If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
- SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
- SmallPtrSetImpl<Instruction*> &ConsideredInsts,
- const TargetLowering &TLI) {
+static bool FindAllMemoryUses(
+ Instruction *I,
+ SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -2713,12 +3036,12 @@ static bool FindAllMemoryUses(Instruction *I,
if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+ if (!IsOperandAMemoryOperand(CI, IA, I, TM))
return true;
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
return true;
}
@@ -2806,7 +3129,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// uses.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -2821,9 +3144,11 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
- if (!Address->getType()->isPointerTy())
+ PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
+ if (!AddrTy)
return false;
- Type *AddressAccessTy = Address->getType()->getPointerElementType();
+ Type *AddressAccessTy = AddrTy->getElementType();
+ unsigned AS = AddrTy->getAddressSpace();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
@@ -2831,7 +3156,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
MemoryInst, Result, InsertedTruncs,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
@@ -2874,7 +3199,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- Type *AccessTy) {
+ Type *AccessTy, unsigned AddrSpace) {
Value *Repl = Addr;
// Try to collapse single-value PHI nodes. This is necessary to undo
@@ -2906,16 +3231,16 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
- for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
- worklist.push_back(P->getIncomingValue(i));
+ for (Value *IncValue : P->incoming_values())
+ worklist.push_back(IncValue);
continue;
}
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
- PromotedInsts, TPT);
+ V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
+ InsertedTruncsSet, PromotedInsts, TPT);
// This check is broken into two cases with very similar code to avoid using
// getNumUses() as much as possible. Some values have a lot of uses, so
@@ -2989,8 +3314,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
<< *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
- } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
- TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
+ } else if (AddrSinkUsingGEPs ||
+ (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
+ TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
+ ->useAA())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -3041,7 +3368,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return false;
} else {
Type *I8PtrTy =
- Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+ Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+ Type *I8Ty = Builder.getInt8Ty();
// Start with the base register. Do this first so that subsequent address
// matching finds it last, which will prevent it from trying to match it
@@ -3093,7 +3421,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
- ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+ ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
ResultIndex = V;
@@ -3104,7 +3432,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
- SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+ SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
@@ -3213,8 +3541,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
+ const TargetRegisterInfo *TRI =
+ TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(CS);
+ TargetConstraints = TLI->ParseConstraints(TRI, CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -3225,7 +3555,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
- MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+ MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
} else if (OpInfo.Type == InlineAsm::isInput)
ArgNo++;
}
@@ -3308,7 +3638,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
LoadInst *&LI, Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInsts = 0) {
+ unsigned CreatedInstsCost = 0) {
// Iterate over all the extensions to see if one form an ext(load).
for (auto I : Exts) {
// Check if we directly have ext(load).
@@ -3330,10 +3660,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
SmallVector<Instruction *, 4> NewExts;
- unsigned NewCreatedInsts = 0;
+ unsigned NewCreatedInstsCost = 0;
+ unsigned ExtCost = !TLI->isExtFree(I);
// Promote.
- Value *PromotedVal =
- TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+ Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
+ &NewExts, nullptr, *TLI);
assert(PromotedVal &&
"TypePromotionHelper should have filtered out those cases");
@@ -3343,9 +3674,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
// With exactly 2, the transformation is neutral, because we will merge
// one extension but leave one. However, we optimistically keep going,
// because the new extension may be removed too.
- unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+ long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
+ TotalCreatedInstsCost -= ExtCost;
if (!StressExtLdPromotion &&
- (TotalCreatedInsts > 1 ||
+ (TotalCreatedInstsCost > 1 ||
!isPromotedInstructionLegal(*TLI, PromotedVal))) {
// The promotion is not profitable, rollback to the previous state.
TPT.rollback(LastKnownGood);
@@ -3353,8 +3685,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
}
// The promotion is profitable.
// Check if it exposes an ext(load).
- (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
- if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+ (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+ if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
// If we have created a new extension, i.e., now we have two
// extensions. We must make sure one of them is merged with
// the load, otherwise we may degrade the code quality.
@@ -3969,148 +4301,6 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
}
-// See if we can speculate calls to intrinsic cttz/ctlz.
-//
-// Example:
-// entry:
-// ...
-// %cmp = icmp eq i64 %val, 0
-// br i1 %cmp, label %end.bb, label %then.bb
-//
-// then.bb:
-// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true)
-// br label %EndBB
-//
-// end.bb:
-// %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ]
-//
-// ==>
-//
-// entry:
-// ...
-// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false)
-//
-static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
- assert(BrInst->isConditional() && "Expected a conditional branch!");
- BasicBlock *ThenBB = BrInst->getSuccessor(1);
- BasicBlock *EndBB = BrInst->getSuccessor(0);
-
- // See if ThenBB contains only one instruction (excluding the
- // terminator and DbgInfoIntrinsic calls).
- IntrinsicInst *II = nullptr;
- CastInst *CI = nullptr;
- for (BasicBlock::iterator I = ThenBB->begin(),
- E = std::prev(ThenBB->end()); I != E; ++I) {
- // Skip debug info.
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- // Check if this is a zero extension or a truncate of a previously
- // matched call to intrinsic cttz/ctlz.
- if (II) {
- // Early exit if we already found a "free" zero extend/truncate.
- if (CI)
- return false;
-
- Type *SrcTy = II->getType();
- Type *DestTy = I->getType();
- Value *V;
-
- if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) {
- // Speculate this zero extend only if it is "free" for the target.
- if (TLI.isZExtFree(SrcTy, DestTy)) {
- CI = cast<CastInst>(I);
- continue;
- }
- } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) {
- // Speculate this truncate only if it is "free" for the target.
- if (TLI.isTruncateFree(SrcTy, DestTy)) {
- CI = cast<CastInst>(I);
- continue;
- }
- } else {
- // Avoid speculating more than one instruction.
- return false;
- }
- }
-
- // See if this is a call to intrinsic cttz/ctlz.
- if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
- // Avoid speculating expensive intrinsic calls.
- if (!TLI.isCheapToSpeculateCttz())
- return false;
- }
- else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) {
- // Avoid speculating expensive intrinsic calls.
- if (!TLI.isCheapToSpeculateCtlz())
- return false;
- } else
- return false;
-
- II = cast<IntrinsicInst>(I);
- }
-
- // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'.
- BasicBlock *EntryBB = BrInst->getParent();
- for (BasicBlock::iterator I = EndBB->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
- Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
-
- if (!OrigV)
- return false;
-
- if (ThenV != II && (!CI || ThenV != CI))
- return false;
-
- if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
- unsigned BitWidth = II->getType()->getIntegerBitWidth();
-
- // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
- // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
- // of the value in input to the cttz/ctlz.
- if (CInt->getValue() != BitWidth)
- return false;
-
- // Hoist the call to cttz/ctlz from ThenBB into EntryBB.
- EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(),
- ThenBB->begin(), std::prev(ThenBB->end()));
-
- // Update PN setting ThenV as the incoming value from both 'EntryBB'
- // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this
- // phi node if all the incoming values are the same.
- PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV);
- PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV);
-
- // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call.
- if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) {
- Type *Ty = II->getArgOperand(0)->getType();
- Value *Args[] = { II->getArgOperand(0),
- ConstantInt::getFalse(II->getContext()) };
- Module *M = EntryBB->getParent()->getParent();
- Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
- IRBuilder<> Builder(II);
- Instruction *NewI = Builder.CreateCall(IF, Args);
-
- // Replace the old call to cttz/ctlz.
- II->replaceAllUsesWith(NewI);
- II->eraseFromParent();
- }
-
- // Update BrInst condition so that the branch to EndBB is always taken.
- // Later on, method 'ConstantFoldTerminator' will simplify this branch
- // replacing it with a direct branch to 'EndBB'.
- // As a side effect, CodeGenPrepare will attempt to simplify the control
- // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into
- // 'EndBB' (calling method 'EliminateFallThrough').
- BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext()));
- return true;
- }
- }
-
- return false;
-}
-
/// Some targets can do store(extractelement) with one instruction.
/// Try to push the extractelement towards the stores when the target
/// has this feature and this is profitable.
@@ -4171,8 +4361,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
- TLInfo, DT)) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
@@ -4214,15 +4404,19 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
return OptimizeCmpExpression(CI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (TLI)
- return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ if (TLI) {
+ unsigned AS = LI->getPointerAddressSpace();
+ return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ }
return false;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI)
+ if (TLI) {
+ unsigned AS = SI->getPointerAddressSpace();
return OptimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType());
+ SI->getOperand(0)->getType(), AS);
+ }
return false;
}
@@ -4263,34 +4457,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
if (isa<ExtractElementInst>(I))
return OptimizeExtractElementInst(I);
- if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) {
- // Check if the branch condition compares a value agaist zero.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ &&
- match(ICI->getOperand(1), m_Zero())) {
- BasicBlock *ThenBB = BI->getSuccessor(1);
- BasicBlock *EndBB = BI->getSuccessor(0);
-
- // Check if ThenBB is only reachable from this basic block; also,
- // check if EndBB has more than one predecessor.
- if (ThenBB->getSinglePredecessor() &&
- !EndBB->getSinglePredecessor()) {
- TerminatorInst *TI = ThenBB->getTerminator();
-
- if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB &&
- // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'.
- OptimizeBranchInst(BI, *TLI)) {
- ModifiedDT = true;
- return true;
- }
- }
- }
- }
- }
- return false;
- }
-
return false;
}
@@ -4469,8 +4635,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
bool CodeGenPrepare::splitBranchCondition(Function &F) {
- if (!TM || TM->Options.EnableFastISel != true ||
- !TLI || TLI->isJumpExpensive())
+ if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
return false;
bool MadeChange = false;
@@ -4631,10 +4796,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
}
- // Request DOM Tree update.
// Note: No point in getting fancy here, since the DT info is never
- // available to CodeGenPrepare and the existing update code is broken
- // anyways.
+ // available to CodeGenPrepare.
ModifiedDT = true;
MadeChange = true;
OpenPOWER on IntegriCloud