diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 241 |
1 files changed, 192 insertions, 49 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index d3648e2..a0c239a 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -39,12 +39,16 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" @@ -68,11 +72,6 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); -static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, - cl::init(false), - cl::desc("Enable unsafe double to float " - "shrinking for math lib calls")); - // Initialization Routines void llvm::initializeInstCombine(PassRegistry &Registry) { initializeInstCombinerPass(Registry); @@ -85,13 +84,18 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { char InstCombiner::ID = 0; INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine", "Combine redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(InstCombiner, "instcombine", "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } @@ -390,6 +394,25 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { if (Instruction::isCommutative(ROp)) return LeftDistributesOverRight(ROp, LOp); + + switch (LOp) { + default: + return false; + // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. + // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. + // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + switch (ROp) { + default: + return false; + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return true; + } + } // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", // but this requires knowing that the addition does not overflow and other // such subtleties. @@ -411,26 +434,37 @@ static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) { } /// This function factors binary ops which can be combined using distributive -/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4). +/// laws. This function tries to transform 'Op' based TopLevelOpcode to enable +/// factorization e.g for ADD(SHL(X , 2), MUL(X, 5)), When this function called +/// with TopLevelOpcode == Instruction::Add and Op = SHL(X, 2), transforms +/// SHL(X, 2) to MUL(X, 4) i.e. returns Instruction::Mul with LHS set to 'X' and +/// RHS to 4. static Instruction::BinaryOps -getBinOpsForFactorization(BinaryOperator *Op, Value *&LHS, Value *&RHS) { +getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, + BinaryOperator *Op, Value *&LHS, Value *&RHS) { if (!Op) return Instruction::BinaryOpsEnd; - if (Op->getOpcode() == Instruction::Shl) { - if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) { - // The multiplier is really 1 << CST. - RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST); - LHS = Op->getOperand(0); - return Instruction::Mul; + LHS = Op->getOperand(0); + RHS = Op->getOperand(1); + + switch (TopLevelOpcode) { + default: + return Op->getOpcode(); + + case Instruction::Add: + case Instruction::Sub: + if (Op->getOpcode() == Instruction::Shl) { + if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) { + // The multiplier is really 1 << CST. + RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST); + return Instruction::Mul; + } } + return Op->getOpcode(); } // TODO: We can add other conversions e.g. shr => div etc. - - LHS = Op->getOperand(0); - RHS = Op->getOperand(1); - return Op->getOpcode(); } /// This tries to simplify binary operations by factorizing out common terms @@ -529,8 +563,9 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // Factorization. Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B); - Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D); + auto TopLevelOpcode = I.getOpcode(); + auto LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B); + auto RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D); // The instruction has the form "(A op' B) op (C op' D)". Try to factorize // a common term. @@ -552,7 +587,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { return V; // Expansion. - Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { // The instruction has the form "(A op' B) op C". See if expanding it out // to "(A op C) op' (B op C)" results in simplifications. @@ -765,13 +799,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // If the incoming non-constant value is in I's block, we will remove one // instruction, but insert another equivalent one, leading to infinite // instcombine. - if (NonConstBB == I.getParent()) + if (isPotentiallyReachable(I.getParent(), NonConstBB, DT, + getAnalysisIfAvailable<LoopInfo>())) return nullptr; } // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be - // inserting the computation one some other paths (e.g. inside a loop). Only + // inserting the computation on some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. if (NonConstBB != nullptr) { BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); @@ -1284,7 +1319,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = SimplifyGEPInst(Ops, DL)) + if (Value *V = SimplifyGEPInst(Ops, DL, TLI, DT, AC)) return ReplaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -1478,19 +1513,50 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); } - // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y)) - // The GEP pattern is emitted by the SCEV expander for certain kinds of - // pointer arithmetic. - if (DL && GEP.getNumIndices() == 1 && - match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) { + if (DL && GEP.getNumIndices() == 1) { unsigned AS = GEP.getPointerAddressSpace(); - if (GEP.getType() == Builder->getInt8PtrTy(AS) && - GEP.getOperand(1)->getType()->getScalarSizeInBits() == + if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == DL->getPointerSizeInBits(AS)) { - Operator *Index = cast<Operator>(GEP.getOperand(1)); - Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType()); - Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1)); - return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); + Type *PtrTy = GEP.getPointerOperandType(); + Type *Ty = PtrTy->getPointerElementType(); + uint64_t TyAllocSize = DL->getTypeAllocSize(Ty); + + bool Matched = false; + uint64_t C; + Value *V = nullptr; + if (TyAllocSize == 1) { + V = GEP.getOperand(1); + Matched = true; + } else if (match(GEP.getOperand(1), + m_AShr(m_Value(V), m_ConstantInt(C)))) { + if (TyAllocSize == 1ULL << C) + Matched = true; + } else if (match(GEP.getOperand(1), + m_SDiv(m_Value(V), m_ConstantInt(C)))) { + if (TyAllocSize == C) + Matched = true; + } + + if (Matched) { + // Canonicalize (gep i8* X, -(ptrtoint Y)) + // to (inttoptr (sub (ptrtoint X), (ptrtoint Y))) + // The GEP pattern is emitted by the SCEV expander for certain kinds of + // pointer arithmetic. + if (match(V, m_Neg(m_PtrToInt(m_Value())))) { + Operator *Index = cast<Operator>(V); + Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType()); + Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1)); + return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); + } + // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) + // to (bitcast Y) + Value *Y; + if (match(V, m_Sub(m_PtrToInt(m_Value(Y)), + m_PtrToInt(m_Specific(GEP.getOperand(0)))))) { + return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, + GEP.getType()); + } + } } } @@ -1667,6 +1733,18 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!DL) return nullptr; + // addrspacecast between types is canonicalized as a bitcast, then an + // addrspacecast. To take advantage of the below bitcast + struct GEP, look + // through the addrspacecast. + if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) { + // X = bitcast A addrspace(1)* to B addrspace(1)* + // Y = addrspacecast A addrspace(1)* to B addrspace(2)* + // Z = gep Y, <...constant indices...> + // Into an addrspacecasted GEP of the struct. + if (BitCastInst *BC = dyn_cast<BitCastInst>(ASC->getOperand(0))) + PtrOp = BC; + } + /// See if we can simplify: /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> @@ -1675,11 +1753,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { Value *Operand = BCI->getOperand(0); PointerType *OpType = cast<PointerType>(Operand->getType()); - unsigned OffsetBits = DL->getPointerTypeSizeInBits(OpType); + unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType()); APInt Offset(OffsetBits, 0); if (!isa<BitCastInst>(Operand) && - GEP.accumulateConstantOffset(*DL, Offset) && - StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { + GEP.accumulateConstantOffset(*DL, Offset)) { // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. @@ -1697,6 +1774,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return &GEP; } } + + if (Operand->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) + return new AddrSpaceCastInst(Operand, GEP.getType()); return new BitCastInst(Operand, GEP.getType()); } @@ -1712,6 +1792,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); NGEP->takeName(&GEP); + + if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) + return new AddrSpaceCastInst(NGEP, GEP.getType()); return new BitCastInst(NGEP, GEP.getType()); } } @@ -1922,7 +2005,25 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { return nullptr; } +Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { + if (RI.getNumOperands() == 0) // ret void + return nullptr; + + Value *ResultOp = RI.getOperand(0); + Type *VTy = ResultOp->getType(); + if (!VTy->isIntegerTy()) + return nullptr; + + // There might be assume intrinsics dominating this return that completely + // determine the value. If so, constant fold it. + unsigned BitWidth = VTy->getPrimitiveSizeInBits(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + computeKnownBits(ResultOp, KnownZero, KnownOne, 0, &RI); + if ((KnownZero|KnownOne).isAllOnesValue()) + RI.setOperand(0, Constant::getIntegerValue(VTy, KnownOne)); + return nullptr; +} Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True @@ -1974,6 +2075,40 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { Value *Cond = SI.getCondition(); + unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + computeKnownBits(Cond, KnownZero, KnownOne); + unsigned LeadingKnownZeros = KnownZero.countLeadingOnes(); + unsigned LeadingKnownOnes = KnownOne.countLeadingOnes(); + + // Compute the number of leading bits we can ignore. + for (auto &C : SI.cases()) { + LeadingKnownZeros = std::min( + LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros()); + LeadingKnownOnes = std::min( + LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes()); + } + + unsigned NewWidth = BitWidth - std::max(LeadingKnownZeros, LeadingKnownOnes); + + // Truncate the condition operand if the new type is equal to or larger than + // the largest legal integer type. We need to be conservative here since + // x86 generates redundant zero-extenstion instructions if the operand is + // truncated to i8 or i16. + bool TruncCond = false; + if (DL && BitWidth > NewWidth && + NewWidth >= DL->getLargestLegalIntTypeSize()) { + TruncCond = true; + IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); + Builder->SetInsertPoint(&SI); + Value *NewCond = Builder->CreateTrunc(SI.getCondition(), Ty, "trunc"); + SI.setCondition(NewCond); + + for (auto &C : SI.cases()) + static_cast<SwitchInst::CaseIt *>(&C)->setValue(ConstantInt::get( + SI.getContext(), C.getCaseValue()->getValue().trunc(NewWidth))); + } + if (Instruction *I = dyn_cast<Instruction>(Cond)) { if (I->getOpcode() == Instruction::Add) if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { @@ -1982,8 +2117,12 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { ConstantInt* CaseVal = i.getCaseValue(); - Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), - AddRHS); + Constant *LHS = CaseVal; + if (TruncCond) + LHS = LeadingKnownZeros + ? ConstantExpr::getZExt(CaseVal, Cond->getType()) + : ConstantExpr::getSExt(CaseVal, Cond->getType()); + Constant* NewCaseVal = ConstantExpr::getSub(LHS, AddRHS); assert(isa<ConstantInt>(NewCaseVal) && "Result of expression should be constant"); i.setValue(cast<ConstantInt>(NewCaseVal)); @@ -1993,7 +2132,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { return &SI; } } - return nullptr; + + return TruncCond ? &SI : nullptr; } Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { @@ -2212,7 +2352,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { // If we already saw this clause, there is no point in having a second // copy of it. - if (AlreadyCaught.insert(TypeInfo)) { + if (AlreadyCaught.insert(TypeInfo).second) { // This catch clause was not already seen. NewClauses.push_back(CatchClause); } else { @@ -2294,7 +2434,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { continue; // There is no point in having multiple copies of the same typeinfo in // a filter, so only add it if we didn't already. - if (SeenInFilter.insert(TypeInfo)) + if (SeenInFilter.insert(TypeInfo).second) NewFilterElts.push_back(cast<Constant>(Elt)); } // A filter containing a catch-all cannot match anything by definition. @@ -2531,7 +2671,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { /// whose condition is a known constant, we only visit the reachable successors. /// static bool AddReachableCodeToWorklist(BasicBlock *BB, - SmallPtrSet<BasicBlock*, 64> &Visited, + SmallPtrSetImpl<BasicBlock*> &Visited, InstCombiner &IC, const DataLayout *DL, const TargetLibraryInfo *TLI) { @@ -2546,7 +2686,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, BB = Worklist.pop_back_val(); // We have now visited this block! If we've already been here, ignore it. - if (!Visited.insert(BB)) continue; + if (!Visited.insert(BB).second) + continue; for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *Inst = BBI++; @@ -2807,13 +2948,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { } namespace { -class InstCombinerLibCallSimplifier : public LibCallSimplifier { +class InstCombinerLibCallSimplifier final : public LibCallSimplifier { InstCombiner *IC; public: InstCombinerLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI, InstCombiner *IC) - : LibCallSimplifier(DL, TLI, UnsafeFPShrink) { + : LibCallSimplifier(DL, TLI) { this->IC = IC; } @@ -2829,18 +2970,20 @@ bool InstCombiner::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); TLI = &getAnalysis<TargetLibraryInfo>(); + // Minimizing size? MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. - IRBuilder<true, TargetFolder, InstCombineIRInserter> - TheBuilder(F.getContext(), TargetFolder(DL), - InstCombineIRInserter(Worklist)); + IRBuilder<true, TargetFolder, InstCombineIRInserter> TheBuilder( + F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, AC)); Builder = &TheBuilder; InstCombinerLibCallSimplifier TheSimplifier(DL, TLI, this); |