diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp | 821 |
1 files changed, 492 insertions, 329 deletions
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 82cd380..6a81403 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" @@ -32,13 +33,13 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -72,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); +static cl::opt<bool> + DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), + cl::desc("Disable GC optimizations in CodeGenPrepare")); + static cl::opt<bool> DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -119,7 +124,6 @@ class TypePromotionTransaction; const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; - DominatorTree *DT; /// CurInstIterator - As we scan instructions optimizing them, this is the /// next instruction to optimize. Xforms that can invalidate this should @@ -137,8 +141,7 @@ class TypePromotionTransaction; /// promotion for the current function. InstrToOrigTy PromotedInsts; - /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to - /// be updated. + /// ModifiedDT - If CFG is modified in anyway. bool ModifiedDT; /// OptSize - True if optimizing for size. @@ -156,8 +159,8 @@ class TypePromotionTransaction; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfo>(); - AU.addRequired<TargetTransformInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } private: @@ -167,7 +170,8 @@ class TypePromotionTransaction; void EliminateMostlyEmptyBlock(BasicBlock *BB); bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); bool OptimizeInst(Instruction *I, bool& ModifiedDT); - bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, + Type *AccessTy, unsigned AS); bool OptimizeInlineAsmInst(CallInst *CS); bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT); bool MoveExtToFormExtLoad(Instruction *&I); @@ -181,8 +185,9 @@ class TypePromotionTransaction; bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, - unsigned CreatedInst); + unsigned CreatedInstCost); bool splitBranchCondition(Function &F); + bool simplifyOffsetableRelocate(Instruction &I); }; } @@ -205,14 +210,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { ModifiedDT = false; if (TM) - TLI = TM->getSubtargetImpl()->getTargetLowering(); - TLInfo = &getAnalysis<TargetLibraryInfo>(); - TTI = &getAnalysis<TargetTransformInfo>(); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; - OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize); + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + OptSize = F.hasFnAttribute(Attribute::OptimizeForSize); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -248,9 +249,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BasicBlock *BB = I++; bool ModifiedDTOnIteration = false; MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); - + // Restart BB iteration if the dominator tree of the Function was changed - ModifiedDT |= ModifiedDTOnIteration; if (ModifiedDTOnIteration) break; } @@ -293,13 +293,18 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (EverMadeChange || MadeChange) MadeChange |= EliminateFallThrough(F); - if (MadeChange) - ModifiedDT = true; EverMadeChange |= MadeChange; } - if (ModifiedDT && DT) - DT->recalculate(F); + if (!DisableGCOpts) { + SmallVector<Instruction *, 2> Statepoints; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isStatepoint(I)) + Statepoints.push_back(&I); + for (auto &I : Statepoints) + EverMadeChange |= simplifyOffsetableRelocate(*I); + } return EverMadeChange; } @@ -326,7 +331,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // Remember if SinglePred was the entry block of the function. // If so, we will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(BB, this); + MergeBasicBlockIntoOnlyPred(BB, nullptr); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -466,7 +471,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, this); + MergeBasicBlockIntoOnlyPred(DestBB, nullptr); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -508,19 +513,188 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (DT && !ModifiedDT) { - BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); - BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); - BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); - DT->changeImmediateDominator(DestBB, NewIDom); - DT->eraseNode(BB); - } BB->eraseFromParent(); ++NumBlocksElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +// Computes a map of base pointer relocation instructions to corresponding +// derived pointer relocation instructions given a vector of all relocate calls +static void computeBaseDerivedRelocateMap( + const SmallVectorImpl<User *> &AllRelocateCalls, + DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> & + RelocateInstMap) { + // Collect information in two maps: one primarily for locating the base object + // while filling the second map; the second map is the final structure holding + // a mapping between Base and corresponding Derived relocate calls + DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap; + for (auto &U : AllRelocateCalls) { + GCRelocateOperands ThisRelocate(U); + IntrinsicInst *I = cast<IntrinsicInst>(U); + auto K = std::make_pair(ThisRelocate.getBasePtrIndex(), + ThisRelocate.getDerivedPtrIndex()); + RelocateIdxMap.insert(std::make_pair(K, I)); + } + for (auto &Item : RelocateIdxMap) { + std::pair<unsigned, unsigned> Key = Item.first; + if (Key.first == Key.second) + // Base relocation: nothing to insert + continue; + + IntrinsicInst *I = Item.second; + auto BaseKey = std::make_pair(Key.first, Key.first); + + // We're iterating over RelocateIdxMap so we cannot modify it. + auto MaybeBase = RelocateIdxMap.find(BaseKey); + if (MaybeBase == RelocateIdxMap.end()) + // TODO: We might want to insert a new base object relocate and gep off + // that, if there are enough derived object relocates. + continue; + + RelocateInstMap[MaybeBase->second].push_back(I); + } +} + +// Accepts a GEP and extracts the operands into a vector provided they're all +// small integer constants +static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, + SmallVectorImpl<Value *> &OffsetV) { + for (unsigned i = 1; i < GEP->getNumOperands(); i++) { + // Only accept small constant integer operands + auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!Op || Op->getZExtValue() > 20) + return false; + } + + for (unsigned i = 1; i < GEP->getNumOperands(); i++) + OffsetV.push_back(GEP->getOperand(i)); + return true; +} + +// Takes a RelocatedBase (base pointer relocation instruction) and Targets to +// replace, computes a replacement, and affects it. +static bool +simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, + const SmallVectorImpl<IntrinsicInst *> &Targets) { + bool MadeChange = false; + for (auto &ToReplace : Targets) { + GCRelocateOperands MasterRelocate(RelocatedBase); + GCRelocateOperands ThisRelocate(ToReplace); + + assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() && + "Not relocating a derived object of the original base object"); + if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) { + // A duplicate relocate call. TODO: coalesce duplicates. + continue; + } + + Value *Base = ThisRelocate.getBasePtr(); + auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr()); + if (!Derived || Derived->getPointerOperand() != Base) + continue; + + SmallVector<Value *, 2> OffsetV; + if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) + continue; + + // Create a Builder and replace the target callsite with a gep + assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"); + + // Insert after RelocatedBase + IRBuilder<> Builder(RelocatedBase->getNextNode()); + Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); + + // If gc_relocate does not match the actual type, cast it to the right type. + // In theory, there must be a bitcast after gc_relocate if the type does not + // match, and we should reuse it to get the derived pointer. But it could be + // cases like this: + // bb1: + // ... + // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) + // br label %merge + // + // bb2: + // ... + // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) + // br label %merge + // + // merge: + // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] + // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* + // + // In this case, we can not find the bitcast any more. So we insert a new bitcast + // no matter there is already one or not. In this way, we can handle all cases, and + // the extra bitcast should be optimized away in later passes. + Instruction *ActualRelocatedBase = RelocatedBase; + if (RelocatedBase->getType() != Base->getType()) { + ActualRelocatedBase = + cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType())); + } + Value *Replacement = Builder.CreateGEP( + Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); + Instruction *ReplacementInst = cast<Instruction>(Replacement); + Replacement->takeName(ToReplace); + // If the newly generated derived pointer's type does not match the original derived + // pointer's type, cast the new derived pointer to match it. Same reasoning as above. + Instruction *ActualReplacement = ReplacementInst; + if (ReplacementInst->getType() != ToReplace->getType()) { + ActualReplacement = + cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType())); + } + ToReplace->replaceAllUsesWith(ActualReplacement); + ToReplace->eraseFromParent(); + + MadeChange = true; + } + return MadeChange; +} + +// Turns this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = relocate(%tok, i32 4, i32 4) +// %ptr' = relocate(%tok, i32 4, i32 5) +// %val = load %ptr' +// +// into this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = gc.relocate(%tok, i32 4, i32 4) +// %ptr' = gep %base' + 15 +// %val = load %ptr' +bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { + bool MadeChange = false; + SmallVector<User *, 2> AllRelocateCalls; + + for (auto *U : I.users()) + if (isGCRelocate(dyn_cast<Instruction>(U))) + // Collect all the relocate calls associated with a statepoint + AllRelocateCalls.push_back(U); + + // We need atleast one base pointer relocation + one derived pointer + // relocation to mangle + if (AllRelocateCalls.size() < 2) + return false; + + // RelocateInstMap is a mapping from the base relocate instruction to the + // corresponding derived relocate instructions + DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap; + computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); + if (RelocateInstMap.empty()) + return false; + + for (auto &Item : RelocateInstMap) + // Item.first is the RelocatedBase to offset against + // Item.second is the vector of Targets to replace + MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); + return MadeChange; +} + /// SinkCast - Sink the specified cast instruction into its user blocks static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); @@ -555,11 +729,11 @@ static bool SinkCast(CastInst *CI) { InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", InsertPt); - MadeChange = true; } // Replace a use of the cast with a use of the new cast. TheUse = InsertedCast; + MadeChange = true; ++NumCastUses; } @@ -609,13 +783,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ return SinkCast(CI); } -/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce +/// CombineUAddWithOverflow - try to combine CI into a call to the +/// llvm.uadd.with.overflow intrinsic if possible. +/// +/// Return true if any changes were made. +static bool CombineUAddWithOverflow(CmpInst *CI) { + Value *A, *B; + Instruction *AddI; + if (!match(CI, + m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) + return false; + + Type *Ty = AddI->getType(); + if (!isa<IntegerType>(Ty)) + return false; + + // We don't want to move around uses of condition values this late, so we we + // check if it is legal to create the call to the intrinsic in the basic + // block containing the icmp: + + if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse()) + return false; + +#ifndef NDEBUG + // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption + // for now: + if (AddI->hasOneUse()) + assert(*AddI->user_begin() == CI && "expected!"); +#endif + + Module *M = CI->getParent()->getParent()->getParent(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + + auto *InsertPt = AddI->hasOneUse() ? CI : AddI; + + auto *UAddWithOverflow = + CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); + auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); + auto *Overflow = + ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); + + CI->replaceAllUsesWith(Overflow); + AddI->replaceAllUsesWith(UAdd); + CI->eraseFromParent(); + AddI->eraseFromParent(); + return true; +} + +/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce /// the number of virtual registers that must be created and coalesced. This is /// a clear win except on targets with multiple condition code registers /// (PowerPC), where it might lose; some adjustment may be wanted there. /// /// Return true if any changes are made. -static bool OptimizeCmpExpression(CmpInst *CI) { +static bool SinkCmpExpression(CmpInst *CI) { BasicBlock *DefBB = CI->getParent(); /// InsertedCmp - Only insert a cmp in each block once. @@ -649,21 +870,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) { CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), CI->getOperand(1), "", InsertPt); - MadeChange = true; } // Replace a use of the cmp with a use of the new cmp. TheUse = InsertedCmp; + MadeChange = true; ++NumCmpUses; } // If we removed all uses, nuke the cmp. - if (CI->use_empty()) + if (CI->use_empty()) { CI->eraseFromParent(); + MadeChange = true; + } return MadeChange; } +static bool OptimizeCmpExpression(CmpInst *CI) { + if (SinkCmpExpression(CI)) + return true; + + if (CombineUAddWithOverflow(CI)) + return true; + + return false; +} + /// isExtractBitsCandidateUse - Check if the candidates could /// be combined with shift instruction, which includes: /// 1. Truncate instruction @@ -943,8 +1176,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); Builder.SetInsertPoint(InsertPt); - - Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); LoadInst* Load = Builder.CreateLoad(Gep, false); VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); @@ -1038,7 +1272,8 @@ static void ScalarizeMaskedStore(CallInst *CI) { Builder.SetInsertPoint(InsertPt); Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); Builder.CreateStore(OneElt, Gep); // Create "else" block, fill it in the next iteration @@ -1072,6 +1307,54 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return true; } + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; + + // Align the pointer arguments to this call if the target thinks it's a good + // idea + unsigned MinSize, PrefAlign; + if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + for (auto &Arg : CI->arg_operands()) { + // We want to align both objects whose address is used directly and + // objects whose address is used in casts and GEPs, though it only makes + // sense for GEPs if the offset is a multiple of the desired alignment and + // if size - offset meets the size threshold. + if (!Arg->getType()->isPointerTy()) + continue; + APInt Offset(TD->getPointerSizeInBits( + cast<PointerType>(Arg->getType())->getAddressSpace()), 0); + Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset); + uint64_t Offset2 = Offset.getLimitedValue(); + if ((Offset2 & (PrefAlign-1)) != 0) + continue; + AllocaInst *AI; + if ((AI = dyn_cast<AllocaInst>(Val)) && + AI->getAlignment() < PrefAlign && + TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) + AI->setAlignment(PrefAlign); + // Global variables can only be aligned if they are defined in this + // object (i.e. they are uniquely initialized in this object), and + // over-aligning global variables that have an explicit section is + // forbidden. + GlobalVariable *GV; + if ((GV = dyn_cast<GlobalVariable>(Val)) && + GV->hasUniqueInitializer() && + !GV->hasSection() && + GV->getAlignment() < PrefAlign && + TD->getTypeAllocSize( + GV->getType()->getElementType()) >= MinSize + Offset2) + GV->setAlignment(PrefAlign); + } + // If this is a memcpy (or similar) then we may be able to improve the + // alignment + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { + unsigned Align = getKnownAlignment(MI->getDest(), *TD); + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) + Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD)); + if (Align > MI->getAlignment()) + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + } + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { @@ -1088,8 +1371,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { WeakVH IterHandle(CurInstIterator); replaceAndRecursivelySimplify(CI, RetVal, - TLI ? TLI->getDataLayout() : nullptr, - TLInfo, ModifiedDT ? nullptr : DT); + TLInfo, nullptr); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -1116,14 +1398,28 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { } return false; } + case Intrinsic::aarch64_stlxr: + case Intrinsic::aarch64_stxr: { + ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); + if (!ExtVal || !ExtVal->hasOneUse() || + ExtVal->getParent() == CI->getParent()) + return false; + // Sink a zext feeding stlxr/stxr before it, so it can be folded into it. + ExtVal->moveBefore(CI); + return true; + } } if (TLI) { + // Unknown address space. + // TODO: Target hook to pick which address space the intrinsic cares + // about? + unsigned AddrSpace = ~0u; SmallVector<Value*, 2> PtrOps; Type *AccessTy; - if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) + if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace)) while (!PtrOps.empty()) - if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) + if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) return true; } } @@ -1131,15 +1427,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { // From here on out we're working with named functions. if (!CI->getCalledFunction()) return false; - // We'll need DataLayout from here on out. - const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; - if (!TD) return false; - // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // to fortified library functions (e.g. __memcpy_chk) that have the default // "don't know" as the objectsize. Anything else should be left alone. - FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true); + FortifiedLibCallSimplifier Simplifier(TLInfo, true); if (Value *V = Simplifier.optimizeCall(CI)) { CI->replaceAllUsesWith(V); CI->eraseFromParent(); @@ -1672,7 +1964,7 @@ class TypePromotionTransaction { Inst->removeFromParent(); } - ~InstructionRemover() { delete Replacer; } + ~InstructionRemover() override { delete Replacer; } /// \brief Really remove the instruction. void commit() override { delete Inst; } @@ -1802,11 +2094,13 @@ void TypePromotionTransaction::rollback( /// This encapsulates the logic for matching the target-legal addressing modes. class AddressingModeMatcher { SmallVectorImpl<Instruction*> &AddrModeInsts; + const TargetMachine &TM; const TargetLowering &TLI; /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and /// the memory instruction that we're computing this address for. Type *AccessTy; + unsigned AddrSpace; Instruction *MemoryInst; /// AddrMode - This is the addressing mode that we're building up. This is @@ -1825,13 +2119,16 @@ class AddressingModeMatcher { /// always returns true. bool IgnoreProfitability; - AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, - const TargetLowering &T, Type *AT, + AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, + const TargetMachine &TM, Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) - : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), + : AddrModeInsts(AMI), TM(TM), + TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent()) + ->getTargetLowering()), + AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { IgnoreProfitability = false; } @@ -1845,16 +2142,16 @@ public: /// optimizations. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p The ongoing transaction where every action should be registered. - static ExtAddrMode Match(Value *V, Type *AccessTy, + static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, SmallVectorImpl<Instruction*> &AddrModeInsts, - const TargetLowering &TLI, + const TargetMachine &TM, const SetOfInstrs &InsertedTruncs, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, + bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS, MemoryInst, Result, InsertedTruncs, PromotedInsts, TPT).MatchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -1869,7 +2166,7 @@ private: ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); - bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, + bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const; }; @@ -1900,7 +2197,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, TestAddrMode.ScaledReg = ScaleReg; // If the new address isn't legal, bail out. - if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) + if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) return false; // It was legal, so commit it. @@ -1917,7 +2214,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, // If this addressing mode is legal, commit it and remember that we folded // this instruction. - if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { + if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) { AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); AddrMode = TestAddrMode; return true; @@ -2003,7 +2300,7 @@ class TypePromotionHelper { /// \brief Utility function to promote the operand of \p Ext when this /// operand is a promotable trunc or sext or zext. /// \p PromotedInsts maps the instructions to their type before promotion. - /// \p CreatedInsts[out] contains how many non-free instructions have been + /// \p CreatedInstsCost[out] contains the cost of all instructions /// created to promote the operand of Ext. /// Newly added extensions are inserted in \p Exts. /// Newly added truncates are inserted in \p Truncs. @@ -2011,53 +2308,55 @@ class TypePromotionHelper { /// \return The promoted value which is used instead of Ext. static Value *promoteOperandForTruncAndAnyExt( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs); + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); /// \brief Utility function to promote the operand of \p Ext when this /// operand is promotable and is not a supported trunc or sext. /// \p PromotedInsts maps the instructions to their type before promotion. - /// \p CreatedInsts[out] contains how many non-free instructions have been + /// \p CreatedInstsCost[out] contains the cost of all the instructions /// created to promote the operand of Ext. /// Newly added extensions are inserted in \p Exts. /// Newly added truncates are inserted in \p Truncs. /// Should never be called directly. /// \return The promoted value which is used instead of Ext. - static Value * - promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, bool IsSExt); + static Value *promoteOperandForOther(Instruction *Ext, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI, bool IsSExt); /// \see promoteOperandForOther. - static Value * - signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, - Truncs, true); + static Value *signExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, true); } /// \see promoteOperandForOther. - static Value * - zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, - Truncs, false); + static Value *zeroExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, false); } public: /// Type for the utility function that promotes the operand of Ext. typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs); + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI); /// \brief Given a sign/zero extend instruction \p Ext, return the approriate /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current @@ -2174,16 +2473,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( llvm::Instruction *SExt, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { // By construction, the operand of SExt is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); Value *ExtVal = SExt; + bool HasMergedNonFreeExt = false; if (isa<ZExtInst>(SExtOpnd)) { // Replace s|zext(zext(opnd)) // => zext(opnd). + HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); Value *ZExt = TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); TPT.replaceAllUsesWith(SExt, ZExt); @@ -2194,7 +2495,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // => z|sext(opnd). TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); } - CreatedInsts = 0; + CreatedInstsCost = 0; // Remove dead code. if (SExtOpnd->use_empty()) @@ -2203,8 +2504,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // Check if the extension is still needed. Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { - if (ExtInst && Exts) - Exts->push_back(ExtInst); + if (ExtInst) { + if (Exts) + Exts->push_back(ExtInst); + CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; + } return ExtVal; } @@ -2217,13 +2521,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( Value *TypePromotionHelper::promoteOperandForOther( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) { + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, + bool IsSExt) { // By construction, the operand of Ext is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); - CreatedInsts = 0; + CreatedInstsCost = 0; if (!ExtOpnd->hasOneUse()) { // ExtOpnd will be promoted. // All its uses, but Ext, will need to use a truncated value of the @@ -2298,7 +2603,6 @@ Value *TypePromotionHelper::promoteOperandForOther( continue; } ExtForOpnd = cast<Instruction>(ValForExtOpnd); - ++CreatedInsts; } if (Exts) Exts->push_back(ExtForOpnd); @@ -2307,6 +2611,7 @@ Value *TypePromotionHelper::promoteOperandForOther( // Move the sign extension before the insertion point. TPT.moveBefore(ExtForOpnd, ExtOpnd); TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); + CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); // If more sext are required, new instructions will have to be created. ExtForOpnd = nullptr; } @@ -2319,22 +2624,22 @@ Value *TypePromotionHelper::promoteOperandForOther( /// IsPromotionProfitable - Check whether or not promoting an instruction /// to a wider type was profitable. -/// \p MatchedSize gives the number of instructions that have been matched -/// in the addressing mode after the promotion was applied. -/// \p SizeWithPromotion gives the number of created instructions for -/// the promotion plus the number of instructions that have been -/// matched in the addressing mode before the promotion. +/// \p NewCost gives the cost of extension instructions created by the +/// promotion. +/// \p OldCost gives the cost of extension instructions before the promotion +/// plus the number of instructions that have been +/// matched in the addressing mode the promotion. /// \p PromotedOperand is the value that has been promoted. /// \return True if the promotion is profitable, false otherwise. -bool -AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, - unsigned SizeWithPromotion, - Value *PromotedOperand) const { - // We folded less instructions than what we created to promote the operand. +bool AddressingModeMatcher::IsPromotionProfitable( + unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { + DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); + // The cost of the new extensions is greater than the cost of the + // old extension plus what we folded. // This is not profitable. - if (MatchedSize < SizeWithPromotion) + if (NewCost > OldCost) return false; - if (MatchedSize > SizeWithPromotion) + if (NewCost < OldCost) return true; // The promotion is neutral but it may help folding the sign extension in // loads for instance. @@ -2374,7 +2679,6 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: - case Instruction::AddrSpaceCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || @@ -2385,6 +2689,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, AddrInst->getOperand(0)->getType() != AddrInst->getType()) return MatchAddr(AddrInst->getOperand(0), Depth); return false; + case Instruction::AddrSpaceCast: { + unsigned SrcAS + = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); + unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); + if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + } case Instruction::Add: { // Check to see if we can merge in the RHS then the LHS. If so, we win. ExtAddrMode BackupAddrMode = AddrMode; @@ -2466,7 +2778,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, // just add it to the disp field and check validity. if (VariableOperand == -1) { AddrMode.BaseOffs += ConstantOffset; - if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ + if (ConstantOffset == 0 || + TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) { // Check to see if we can fold the base pointer in too. if (MatchAddr(AddrInst->getOperand(0), Depth+1)) return true; @@ -2532,9 +2845,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - unsigned CreatedInsts = 0; + unsigned CreatedInstsCost = 0; + unsigned ExtCost = !TLI.isExtFree(Ext); Value *PromotedOperand = - TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr); + TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); // SExt has been moved away. // Thus either it will be rematched later in the recursive calls or it is // gone. Anyway, we must not fold it into the addressing mode at this point. @@ -2556,7 +2870,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, unsigned OldSize = AddrModeInsts.size(); if (!MatchAddr(PromotedOperand, Depth) || - !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, + // The total of the new cost is equals to the cost of the created + // instructions. + // The total of the old cost is equals to the cost of the extension plus + // what we have saved in the addressing mode. + !IsPromotionProfitable(CreatedInstsCost, + ExtCost + (AddrModeInsts.size() - OldSize), PromotedOperand)) { AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); @@ -2583,14 +2902,14 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { // Fold in immediates if legal for the target. AddrMode.BaseOffs += CI->getSExtValue(); - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) return true; AddrMode.BaseOffs -= CI->getSExtValue(); } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { // If this is a global variable, try to fold it into the addressing mode. if (!AddrMode.BaseGV) { AddrMode.BaseGV = GV; - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) return true; AddrMode.BaseGV = nullptr; } @@ -2634,7 +2953,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { AddrMode.HasBaseReg = true; AddrMode.BaseReg = Addr; // Still check for legality in case the target supports [imm] but not [i+r]. - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) return true; AddrMode.HasBaseReg = false; AddrMode.BaseReg = nullptr; @@ -2644,7 +2963,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (AddrMode.Scale == 0) { AddrMode.Scale = 1; AddrMode.ScaledReg = Addr; - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) return true; AddrMode.Scale = 0; AddrMode.ScaledReg = nullptr; @@ -2658,13 +2977,17 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { /// inline asm call are due to memory operands. If so, return true, otherwise /// return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, - const TargetLowering &TLI) { - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); + const TargetMachine &TM) { + const Function *F = CI->getParent()->getParent(); + const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo(); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI->ParseConstraints(TRI, ImmutableCallSite(CI)); for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); // If this asm operand is our Value*, and if it isn't an indirect memory // operand, we can't fold it! @@ -2680,10 +3003,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, /// FindAllMemoryUses - Recursively walk all the uses of I until we find a /// memory use. If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. -static bool FindAllMemoryUses(Instruction *I, - SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, - SmallPtrSetImpl<Instruction*> &ConsideredInsts, - const TargetLowering &TLI) { +static bool FindAllMemoryUses( + Instruction *I, + SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, + SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -2713,12 +3036,12 @@ static bool FindAllMemoryUses(Instruction *I, if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. - if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) + if (!IsOperandAMemoryOperand(CI, IA, I, TM)) return true; continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM)) return true; } @@ -2806,7 +3129,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // uses. SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; SmallPtrSet<Instruction*, 16> ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of @@ -2821,9 +3144,11 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // Get the access type of this use. If the use isn't a pointer, we don't // know what it accesses. Value *Address = User->getOperand(OpNo); - if (!Address->getType()->isPointerTy()) + PointerType *AddrTy = dyn_cast<PointerType>(Address->getType()); + if (!AddrTy) return false; - Type *AddressAccessTy = Address->getType()->getPointerElementType(); + Type *AddressAccessTy = AddrTy->getElementType(); + unsigned AS = AddrTy->getAddressSpace(); // Do a match against the root of this address, ignoring profitability. This // will tell us if the addressing mode for the memory operation will @@ -2831,7 +3156,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode Result; TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS, MemoryInst, Result, InsertedTruncs, PromotedInsts, TPT); Matcher.IgnoreProfitability = true; @@ -2874,7 +3199,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { /// This method is used to optimize both load/store and inline asms with memory /// operands. bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - Type *AccessTy) { + Type *AccessTy, unsigned AddrSpace) { Value *Repl = Addr; // Try to collapse single-value PHI nodes. This is necessary to undo @@ -2906,16 +3231,16 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast<PHINode>(V)) { - for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) - worklist.push_back(P->getIncomingValue(i)); + for (Value *IncValue : P->incoming_values()) + worklist.push_back(IncValue); continue; } // For non-PHIs, determine the addressing mode being computed. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, - PromotedInsts, TPT); + V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM, + InsertedTruncsSet, PromotedInsts, TPT); // This check is broken into two cases with very similar code to avoid using // getNumUses() as much as possible. Some values have a lot of uses, so @@ -2989,8 +3314,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); - } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && - TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { + } else if (AddrSinkUsingGEPs || + (!AddrSinkUsingGEPs.getNumOccurrences() && TM && + TM->getSubtargetImpl(*MemoryInst->getParent()->getParent()) + ->useAA())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " @@ -3041,7 +3368,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return false; } else { Type *I8PtrTy = - Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + Type *I8Ty = Builder.getInt8Ty(); // Start with the base register. Do this first so that subsequent address // matching finds it last, which will prevent it from trying to match it @@ -3093,7 +3421,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); - ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } ResultIndex = V; @@ -3104,7 +3432,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } else { if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); - SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } if (SunkAddr->getType() != Addr->getType()) @@ -3213,8 +3541,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; + const TargetRegisterInfo *TRI = + TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI->ParseConstraints(CS); + TargetConstraints = TLI->ParseConstraints(TRI, CS); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -3225,7 +3555,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { Value *OpVal = CS->getArgOperand(ArgNo++); - MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); + MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); } else if (OpInfo.Type == InlineAsm::isInput) ArgNo++; } @@ -3308,7 +3638,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, - unsigned CreatedInsts = 0) { + unsigned CreatedInstsCost = 0) { // Iterate over all the extensions to see if one form an ext(load). for (auto I : Exts) { // Check if we directly have ext(load). @@ -3330,10 +3660,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); SmallVector<Instruction *, 4> NewExts; - unsigned NewCreatedInsts = 0; + unsigned NewCreatedInstsCost = 0; + unsigned ExtCost = !TLI->isExtFree(I); // Promote. - Value *PromotedVal = - TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr); + Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, + &NewExts, nullptr, *TLI); assert(PromotedVal && "TypePromotionHelper should have filtered out those cases"); @@ -3343,9 +3674,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, // With exactly 2, the transformation is neutral, because we will merge // one extension but leave one. However, we optimistically keep going, // because the new extension may be removed too. - unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts; + long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; + TotalCreatedInstsCost -= ExtCost; if (!StressExtLdPromotion && - (TotalCreatedInsts > 1 || + (TotalCreatedInstsCost > 1 || !isPromotedInstructionLegal(*TLI, PromotedVal))) { // The promotion is not profitable, rollback to the previous state. TPT.rollback(LastKnownGood); @@ -3353,8 +3685,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, } // The promotion is profitable. // Check if it exposes an ext(load). - (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts); - if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 || + (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); + if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || // If we have created a new extension, i.e., now we have two // extensions. We must make sure one of them is merged with // the load, otherwise we may degrade the code quality. @@ -3969,148 +4301,6 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); } -// See if we can speculate calls to intrinsic cttz/ctlz. -// -// Example: -// entry: -// ... -// %cmp = icmp eq i64 %val, 0 -// br i1 %cmp, label %end.bb, label %then.bb -// -// then.bb: -// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) -// br label %EndBB -// -// end.bb: -// %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ] -// -// ==> -// -// entry: -// ... -// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false) -// -static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) { - assert(BrInst->isConditional() && "Expected a conditional branch!"); - BasicBlock *ThenBB = BrInst->getSuccessor(1); - BasicBlock *EndBB = BrInst->getSuccessor(0); - - // See if ThenBB contains only one instruction (excluding the - // terminator and DbgInfoIntrinsic calls). - IntrinsicInst *II = nullptr; - CastInst *CI = nullptr; - for (BasicBlock::iterator I = ThenBB->begin(), - E = std::prev(ThenBB->end()); I != E; ++I) { - // Skip debug info. - if (isa<DbgInfoIntrinsic>(I)) - continue; - - // Check if this is a zero extension or a truncate of a previously - // matched call to intrinsic cttz/ctlz. - if (II) { - // Early exit if we already found a "free" zero extend/truncate. - if (CI) - return false; - - Type *SrcTy = II->getType(); - Type *DestTy = I->getType(); - Value *V; - - if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) { - // Speculate this zero extend only if it is "free" for the target. - if (TLI.isZExtFree(SrcTy, DestTy)) { - CI = cast<CastInst>(I); - continue; - } - } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) { - // Speculate this truncate only if it is "free" for the target. - if (TLI.isTruncateFree(SrcTy, DestTy)) { - CI = cast<CastInst>(I); - continue; - } - } else { - // Avoid speculating more than one instruction. - return false; - } - } - - // See if this is a call to intrinsic cttz/ctlz. - if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) { - // Avoid speculating expensive intrinsic calls. - if (!TLI.isCheapToSpeculateCttz()) - return false; - } - else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) { - // Avoid speculating expensive intrinsic calls. - if (!TLI.isCheapToSpeculateCtlz()) - return false; - } else - return false; - - II = cast<IntrinsicInst>(I); - } - - // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'. - BasicBlock *EntryBB = BrInst->getParent(); - for (BasicBlock::iterator I = EndBB->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - Value *ThenV = PN->getIncomingValueForBlock(ThenBB); - Value *OrigV = PN->getIncomingValueForBlock(EntryBB); - - if (!OrigV) - return false; - - if (ThenV != II && (!CI || ThenV != CI)) - return false; - - if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) { - unsigned BitWidth = II->getType()->getIntegerBitWidth(); - - // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz - // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits - // of the value in input to the cttz/ctlz. - if (CInt->getValue() != BitWidth) - return false; - - // Hoist the call to cttz/ctlz from ThenBB into EntryBB. - EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(), - ThenBB->begin(), std::prev(ThenBB->end())); - - // Update PN setting ThenV as the incoming value from both 'EntryBB' - // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this - // phi node if all the incoming values are the same. - PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV); - PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV); - - // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call. - if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) { - Type *Ty = II->getArgOperand(0)->getType(); - Value *Args[] = { II->getArgOperand(0), - ConstantInt::getFalse(II->getContext()) }; - Module *M = EntryBB->getParent()->getParent(); - Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty); - IRBuilder<> Builder(II); - Instruction *NewI = Builder.CreateCall(IF, Args); - - // Replace the old call to cttz/ctlz. - II->replaceAllUsesWith(NewI); - II->eraseFromParent(); - } - - // Update BrInst condition so that the branch to EndBB is always taken. - // Later on, method 'ConstantFoldTerminator' will simplify this branch - // replacing it with a direct branch to 'EndBB'. - // As a side effect, CodeGenPrepare will attempt to simplify the control - // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into - // 'EndBB' (calling method 'EliminateFallThrough'). - BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext())); - return true; - } - } - - return false; -} - /// Some targets can do store(extractelement) with one instruction. /// Try to push the extractelement towards the stores when the target /// has this feature and this is profitable. @@ -4171,8 +4361,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, - TLInfo, DT)) { + const DataLayout &DL = I->getModule()->getDataLayout(); + if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) { P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; @@ -4214,15 +4404,19 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (TLI) - return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + if (TLI) { + unsigned AS = LI->getPointerAddressSpace(); + return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); + } return false; } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (TLI) + if (TLI) { + unsigned AS = SI->getPointerAddressSpace(); return OptimizeMemoryInst(I, SI->getOperand(1), - SI->getOperand(0)->getType()); + SI->getOperand(0)->getType(), AS); + } return false; } @@ -4263,34 +4457,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { if (isa<ExtractElementInst>(I)) return OptimizeExtractElementInst(I); - if (BranchInst *BI = dyn_cast<BranchInst>(I)) { - if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) { - // Check if the branch condition compares a value agaist zero. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { - if (ICI->getPredicate() == ICmpInst::ICMP_EQ && - match(ICI->getOperand(1), m_Zero())) { - BasicBlock *ThenBB = BI->getSuccessor(1); - BasicBlock *EndBB = BI->getSuccessor(0); - - // Check if ThenBB is only reachable from this basic block; also, - // check if EndBB has more than one predecessor. - if (ThenBB->getSinglePredecessor() && - !EndBB->getSinglePredecessor()) { - TerminatorInst *TI = ThenBB->getTerminator(); - - if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB && - // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'. - OptimizeBranchInst(BI, *TLI)) { - ModifiedDT = true; - return true; - } - } - } - } - } - return false; - } - return false; } @@ -4469,8 +4635,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. /// bool CodeGenPrepare::splitBranchCondition(Function &F) { - if (!TM || TM->Options.EnableFastISel != true || - !TLI || TLI->isJumpExpensive()) + if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) return false; bool MadeChange = false; @@ -4631,10 +4796,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } } - // Request DOM Tree update. // Note: No point in getting fancy here, since the DT info is never - // available to CodeGenPrepare and the existing update code is broken - // anyways. + // available to CodeGenPrepare. ModifiedDT = true; MadeChange = true; |