diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms')
37 files changed, 2127 insertions, 659 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp index a509931..d3d4963 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -83,7 +83,8 @@ bool DTE::runOnModule(Module &M) { bool Changed = false; TypeSymbolTable &ST = M.getTypeSymbolTable(); - std::set<const Type *> UsedTypes = getAnalysis<FindUsedTypes>().getTypes(); + const SetVector<const Type*> &T = getAnalysis<FindUsedTypes>().getTypes(); + std::set<const Type*> UsedTypes(T.begin(), T.end()); // Check the symbol table for superfluous type entries... // diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp index 9d432de..d9911bf 100644 --- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -51,20 +51,32 @@ namespace { // Visit the GlobalVariables. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { + if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { + I->setInitializer(0); + } else { + if (I->hasAvailableExternallyLinkage()) + continue; + if (I->getName() == "llvm.global_ctors") + continue; + } + if (I->hasLocalLinkage()) I->setVisibility(GlobalValue::HiddenVisibility); I->setLinkage(GlobalValue::ExternalLinkage); - if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) - I->setInitializer(0); } // Visit the Functions. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { + I->deleteBody(); + } else { + if (I->hasAvailableExternallyLinkage()) + continue; + } + if (I->hasLocalLinkage()) I->setVisibility(GlobalValue::HiddenVisibility); I->setLinkage(GlobalValue::ExternalLinkage); - if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) - I->deleteBody(); } return true; diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index ded58ac..cdf7b76 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -241,15 +241,15 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, GS.HasPHIUser = true; } else if (isa<CmpInst>(I)) { GS.isCompared = true; - } else if (isa<MemTransferInst>(I)) { - const MemTransferInst *MTI = cast<MemTransferInst>(I); + } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + if (MTI->isVolatile()) return true; if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::isStored; if (MTI->getArgOperand(1) == V) GS.isLoaded = true; - } else if (isa<MemSetInst>(I)) { - assert(cast<MemSetInst>(I)->getArgOperand(0) == V && - "Memset only takes one pointer!"); + } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::isStored; } else { return true; // Any other non-load instruction might take address! @@ -799,7 +799,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { // If we get here we could have other crazy uses that are transitively // loaded. assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || - isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!"); + isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) && + "Only expect load and stores!"); } } @@ -1589,8 +1590,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) - SOVC = - ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) @@ -2438,6 +2438,20 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, // Cannot handle inline asm. if (isa<InlineAsm>(CI->getCalledValue())) return false; + if (MemSetInst *MSI = dyn_cast<MemSetInst>(CI)) { + if (MSI->isVolatile()) return false; + Constant *Ptr = getVal(Values, MSI->getDest()); + Constant *Val = getVal(Values, MSI->getValue()); + Constant *DestVal = ComputeLoadResult(getVal(Values, Ptr), + MutatedMemory); + if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { + // This memset is a no-op. + ++CurInst; + continue; + } + return false; + } + // Resolve function pointers. Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue())); diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp index 9470180..2f3baeb 100644 --- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -180,6 +180,7 @@ bool PruneEH::SimplifyFunction(Function *F) { Call->takeName(II); Call->setCallingConv(II->getCallingConv()); Call->setAttributes(II->getAttributes()); + Call->setDebugLoc(II->getDebugLoc()); // Anything that used the value produced by the invoke instruction // now uses the value produced by the call instruction. Note that we @@ -238,7 +239,7 @@ void PruneEH::DeleteBasicBlock(BasicBlock *BB) { for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { --I; if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (!isa<DbgInfoIntrinsic>(I)) + if (!isa<IntrinsicInst>(I)) CGN->removeCallEdgeFor(CI); } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) CGN->removeCallEdgeFor(II); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h index 9c70cf8..8257d6b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -233,7 +233,15 @@ public: Worklist.Add(New); return New; } - + + // InsertNewInstWith - same as InsertNewInstBefore, but also sets the + // debug loc. + // + Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { + New->setDebugLoc(Old.getDebugLoc()); + return InsertNewInstBefore(New, Old); + } + // ReplaceInstUsesWith - This method is to be used when an instruction is // found to be dead, replacable with another preexisting expression. Here // we add all uses of I to the worklist, replace all uses of I with the new diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 726105f..ef67701 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -111,10 +111,10 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); - Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign); - InsertNewInstBefore(L, *MI); - InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign), - *MI); + LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); + L->setAlignment(SrcAlign); + StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); + S->setAlignment(DstAlign); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); @@ -154,8 +154,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), - Dest, false, Alignment), *MI); + StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, + MI->isVolatile()); + S->setAlignment(Alignment); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setLength(Constant::getNullValue(LenC->getType())); @@ -405,20 +406,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (LHSKnownNegative && RHSKnownNegative) { // The sign bit is set in both cases: this MUST overflow. // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); - Worklist.Add(Add); + Value *Add = Builder->CreateAdd(LHS, RHS); + Add->takeName(&CI); Constant *V[] = { - UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext()) + UndefValue::get(LHS->getType()), + ConstantInt::getTrue(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); return InsertValueInst::Create(Struct, Add, 0); } - + if (LHSKnownPositive && RHSKnownPositive) { // The sign bit is clear in both cases: this CANNOT overflow. // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); - Worklist.Add(Add); + Value *Add = Builder->CreateNUWAdd(LHS, RHS); + Add->takeName(&CI); Constant *V[] = { UndefValue::get(LHS->getType()), ConstantInt::getFalse(II->getContext()) @@ -588,6 +590,28 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + + case Intrinsic::x86_sse41_pmovsxbw: + case Intrinsic::x86_sse41_pmovsxwd: + case Intrinsic::x86_sse41_pmovsxdq: + case Intrinsic::x86_sse41_pmovzxbw: + case Intrinsic::x86_sse41_pmovzxwd: + case Intrinsic::x86_sse41_pmovzxdq: { + // pmov{s|z}x ignores the upper half of their input vectors. + unsigned VWidth = + cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); + unsigned LowHalfElts = VWidth / 2; + APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); + APInt UndefElts(VWidth, 0); + if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), + InputDemandedElts, + UndefElts)) { + II->setArgOperand(0, TmpV); + return II; + } + break; + } + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { @@ -813,7 +837,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // If OldCall dues not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!OldCall->getType()->isVoidTy()) - OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); + ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); if (isa<CallInst>(OldCall)) return EraseInstFromFunction(*OldCall); @@ -835,8 +859,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) - CS.getInstruction()-> - replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); + ReplaceInstUsesWith(*CS.getInstruction(), + UndefValue::get(CS.getInstruction()->getType())); if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { // Don't break the CFG, insert a dummy cond branch. @@ -1084,15 +1108,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { - NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), - Caller->getName(), Caller); + NC = Builder->CreateInvoke(Callee, II->getNormalDest(), + II->getUnwindDest(), Args.begin(), Args.end()); + NC->takeName(II); cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); } else { - NC = CallInst::Create(Callee, Args.begin(), Args.end(), - Caller->getName(), Caller); CallInst *CI = cast<CallInst>(Caller); + NC = Builder->CreateCall(Callee, Args.begin(), Args.end()); + NC->takeName(CI); if (CI->isTailCall()) cast<CallInst>(NC)->setTailCall(); cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); @@ -1106,6 +1130,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, OldRetTy, false); NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); + NC->setDebugLoc(Caller->getDebugLoc()); // If this is an invoke instruction, we should insert it after the first // non-phi, instruction in the normal successor block. @@ -1123,8 +1148,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } if (!Caller->use_empty()) - Caller->replaceAllUsesWith(NV); - + ReplaceInstUsesWith(*Caller, NV); + EraseInstFromFunction(*Caller); return true; } @@ -1189,7 +1214,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { // Add the chain argument and attributes. Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) - NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); + NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); } @@ -1255,24 +1280,19 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { NewCaller = InvokeInst::Create(NewCallee, II->getNormalDest(), II->getUnwindDest(), - NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); + NewArgs.begin(), NewArgs.end()); cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); } else { - NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); + NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end()); if (cast<CallInst>(Caller)->isTailCall()) cast<CallInst>(NewCaller)->setTailCall(); cast<CallInst>(NewCaller)-> setCallingConv(cast<CallInst>(Caller)->getCallingConv()); cast<CallInst>(NewCaller)->setAttributes(NewPAL); } - if (!Caller->getType()->isVoidTy()) - Caller->replaceAllUsesWith(NewCaller); - Caller->eraseFromParent(); - Worklist.Remove(Caller); - return 0; + + return NewCaller; } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 6f70de8..199902a 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -133,7 +133,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); - AI.replaceAllUsesWith(NewCast); + ReplaceInstUsesWith(AI, NewCast); } return ReplaceInstUsesWith(CI, New); } @@ -211,7 +211,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, } Res->takeName(I); - return InsertNewInstBefore(Res, *I); + return InsertNewInstWith(Res, *I); } @@ -1228,7 +1228,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // Remove the old Call. With -fmath-errno, it won't get marked readnone. - Call->replaceAllUsesWith(UndefValue::get(Call->getType())); + ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); EraseInstFromFunction(*Call); return ret; } @@ -1684,8 +1684,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"", - ((Instruction*)NULL)); + return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end()); } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index bb9b88b..c7ed098 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -469,8 +469,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// /// If we can't emit an optimized form for this expression, this returns null. /// -static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, - InstCombiner &IC) { +static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); @@ -533,10 +532,10 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, // Cast to intptrty in case a truncation occurs. If an extension is needed, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. - if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, - TD.getIntPtrType(VariableIdx->getContext()), - VariableIdx->getName(), &I); + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { + const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); + } return VariableIdx; } @@ -558,11 +557,10 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, // Okay, we can do this evaluation. Start by converting the index to intptr. const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) - VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, - true /*SExt*/, - VariableIdx->getName(), &I); + VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, + true /*Signed*/); Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); + return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); } /// FoldGEPICmp - Fold comparisons between a GEP instruction and something @@ -580,7 +578,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); // If not, synthesize the offset the hard way. if (Offset == 0) @@ -634,6 +632,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (AllZeros) return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); + bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { // If the GEPs only differ by one index, compare it. unsigned NumDifferences = 0; // Keep track of # differences. @@ -656,7 +655,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ConstantInt::get(Type::getInt1Ty(I.getContext()), ICmpInst::isTrueWhenEqual(Cond))); - else if (NumDifferences == 1) { + else if (NumDifferences == 1 && GEPsInBounds) { Value *LHSV = GEPLHS->getOperand(DiffOperand); Value *RHSV = GEPRHS->getOperand(DiffOperand); // Make sure we do a signed comparison here. @@ -667,6 +666,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! if (TD && + GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) @@ -919,11 +919,11 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) return 0; - // Otherwise, all lshr and all exact ashr's are equivalent to a udiv/sdiv by - // a power of 2. Since we already have logic to simplify these, transform - // to div and then simplify the resultant comparison. + // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv + // by a power of 2. Since we already have logic to simplify these, + // transform to div and then simplify the resultant comparison. if (Shr->getOpcode() == Instruction::AShr && - !Shr->isExact()) + (!Shr->isExact() || ShAmtVal == TypeBits - 1)) return 0; // Revisit the shift (to delete it). @@ -2400,7 +2400,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // fall-through case Instruction::SDiv: case Instruction::AShr: - if (!BO0->isExact() && !BO1->isExact()) + if (!BO0->isExact() || !BO1->isExact()) break; return new ICmpInst(I.getPredicate(), BO0->getOperand(0), BO1->getOperand(0)); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 432adc9..f499290 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -57,12 +57,14 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { Value *Idx[2]; Idx[0] = NullIdx; Idx[1] = NullIdx; - Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, - New->getName()+".sub", It); + Instruction *GEP = + GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, + New->getName()+".sub"); + InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. - return ReplaceInstUsesWith(AI, V); + return ReplaceInstUsesWith(AI, GEP); } else if (isa<UndefValue>(AI.getArraySize())) { return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } @@ -600,10 +602,12 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstNonPHI(); - InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile(), - SI.getAlignment()), *BBI); - + StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), + OtherStore->isVolatile(), + SI.getAlignment()); + InsertNewInstBefore(NewSI, *BBI); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); + // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 57fb08a..2d29403 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -19,6 +19,60 @@ using namespace llvm; using namespace PatternMatch; + +/// simplifyValueKnownNonZero - The specific integer value is used in a context +/// where it is known to be non-zero. If this allows us to simplify the +/// computation, do so and return the new operand, otherwise return null. +static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { + // If V has multiple uses, then we would have to do more analysis to determine + // if this is safe. For example, the use could be in dynamically unreached + // code. + if (!V->hasOneUse()) return 0; + + bool MadeChange = false; + + // ((1 << A) >>u B) --> (1 << (A-B)) + // Because V cannot be zero, we know that B is less than A. + Value *A = 0, *B = 0, *PowerOf2 = 0; + if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), + m_Value(B))) && + // The "1" can be any value known to be a power of 2. + isPowerOfTwo(PowerOf2, IC.getTargetData())) { + A = IC.Builder->CreateSub(A, B, "tmp"); + return IC.Builder->CreateShl(PowerOf2, A); + } + + // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it + // inexact. Similarly for <<. + if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) + if (I->isLogicalShift() && + isPowerOfTwo(I->getOperand(0), IC.getTargetData())) { + // We know that this is an exact/nuw shift and that the input is a + // non-zero context as well. + if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) { + I->setOperand(0, V2); + MadeChange = true; + } + + if (I->getOpcode() == Instruction::LShr && !I->isExact()) { + I->setIsExact(); + MadeChange = true; + } + + if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) { + I->setHasNoUnsignedWrap(); + MadeChange = true; + } + } + + // TODO: Lots more we could do here: + // If V is a phi node, we can call this on each of its operands. + // "select cond, X, 0" can simplify to "X". + + return MadeChange ? V : 0; +} + + /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { @@ -81,6 +135,29 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI)); } } + + // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n + // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n + // The "* (2**n)" thus becomes a potential shifting opportunity. + { + const APInt & Val = CI->getValue(); + const APInt &PosVal = Val.abs(); + if (Val.isNegative() && PosVal.isPowerOf2()) { + Value *X = 0, *Y = 0; + if (Op0->hasOneUse()) { + ConstantInt *C1; + Value *Sub = 0; + if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) + Sub = Builder->CreateSub(X, Y, "suba"); + else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) + Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc"); + if (Sub) + return + BinaryOperator::CreateMul(Sub, + ConstantInt::get(Y->getType(), PosVal)); + } + } + } } // Simplify mul instructions with a constant RHS. @@ -293,6 +370,12 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + // The RHS is known non-zero. + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + I.setOperand(1, V); + return &I; + } + // Handle cases involving: [su]div X, (select Cond, Y, Z) // This does not apply for fdiv. if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) @@ -499,11 +582,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + // The RHS is known non-zero. + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + I.setOperand(1, V); + return &I; + } + // Handle cases involving: rem X, (select Cond, Y, Z) if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) return &I; - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + if (isa<ConstantInt>(Op1)) { if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { if (Instruction *R = FoldOpIntoSelect(I, SI)) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index abf61bb..3777340 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -110,16 +110,20 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { } } - if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - LHSVal, RHSVal); - + if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) { + CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; + } + BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst); BinaryOperator *NewBinOp = BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); if (isNUW) NewBinOp->setHasNoUnsignedWrap(); if (isNSW) NewBinOp->setHasNoSignedWrap(); if (isExact) NewBinOp->setIsExact(); + NewBinOp->setDebugLoc(FirstInst->getDebugLoc()); return NewBinOp; } @@ -228,6 +232,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst::Create(Base, FixedOperands.begin()+1, FixedOperands.end()); if (AllInBounds) NewGEP->setIsInBounds(); + NewGEP->setDebugLoc(FirstInst->getDebugLoc()); return NewGEP; } @@ -369,7 +374,9 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); - return new LoadInst(PhiVal, "", isVolatile, LoadAlignment); + LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment); + NewLI->setDebugLoc(FirstLI->getDebugLoc()); + return NewLI; } @@ -469,20 +476,27 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { } // Insert and return the new operation. - if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) - return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); + if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) { + CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal, + PN.getType()); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; + } if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) { BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); if (isNUW) BinOp->setHasNoUnsignedWrap(); if (isNSW) BinOp->setHasNoSignedWrap(); if (isExact) BinOp->setIsExact(); + BinOp->setDebugLoc(FirstInst->getDebugLoc()); return BinOp; } CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - PhiVal, ConstantOp); + CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + PhiVal, ConstantOp); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; } /// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 61a433a..aeb3c3e 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -133,9 +133,8 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, } // Fold this by inserting a select from the input values. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName()+".v"); - InsertNewInstBefore(NewSI, SI); + Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName()+".v"); return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); } @@ -174,9 +173,8 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, } // If we reach here, they do have operations in common. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, - OtherOpF, SI.getName()+".v"); - InsertNewInstBefore(NewSI, SI); + Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, + OtherOpF, SI.getName()+".v"); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) { if (MatchIsOpZero) @@ -224,8 +222,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); - InsertNewInstBefore(NewSel, SI); + Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), @@ -260,8 +257,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); - InsertNewInstBefore(NewSel, SI); + Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI); BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), @@ -282,6 +278,59 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, return 0; } +/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is +/// replaced with RepOp. +static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const TargetData *TD) { + // Trivial replacement. + if (V == Op) + return RepOp; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return 0; + + // If this is a binary operator, try to simplify it with the replaced op. + if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { + if (B->getOperand(0) == Op) + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD); + if (B->getOperand(1) == Op) + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD); + } + + // Same for CmpInsts. + if (CmpInst *C = dyn_cast<CmpInst>(I)) { + if (C->getOperand(0) == Op) + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD); + if (C->getOperand(1) == Op) + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD); + } + + // TODO: We could hand off more cases to instsimplify here. + + // If all operands are constant after substituting Op for RepOp then we can + // constant fold the instruction. + if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) { + // Build a list of all constant operands. + SmallVector<Constant*, 8> ConstOps; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (I->getOperand(i) == Op) + ConstOps.push_back(CRepOp); + else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i))) + ConstOps.push_back(COp); + else + break; + } + + // All operands were constants, fold it. + if (ConstOps.size() == I->getNumOperands()) + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + ConstOps.data(), ConstOps.size(), TD); + } + + return 0; +} + /// visitSelectInstWithICmp - Visit a SelectInst that has an /// ICmpInst as its first operand. /// @@ -420,25 +469,21 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } - if (CmpLHS == TrueVal && CmpRHS == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (Pred == ICmpInst::ICMP_EQ) + // If we have an equality comparison then we know the value in one of the + // arms of the select. See if substituting this value into the arm and + // simplifying the result yields the same value as the other arm. + if (Pred == ICmpInst::ICMP_EQ) { + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? X : Y -> X - if (Pred == ICmpInst::ICMP_NE) + } else if (Pred == ICmpInst::ICMP_NE) { + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - - } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { - // Transform (X == Y) ? Y : X -> X - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? Y : X -> Y - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX } + // NOTE: if we wanted to, this is where to detect integer MIN/MAX + if (isa<Constant>(CmpRHS)) { if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { // Transform (X == C) ? X : Y -> (X == C) ? C : Y @@ -604,9 +649,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::CreateOr(CondVal, FalseVal); } // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { if (C->getZExtValue() == false) { @@ -614,9 +657,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::CreateAnd(CondVal, TrueVal); } // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return BinaryOperator::CreateOr(NotCond, TrueVal); } @@ -755,27 +796,20 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // So at this point we know we have (Y -> OtherAddOp): // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z - if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { - NegVal = ConstantExpr::getNeg(C); - } else if (SI.getType()->isFloatingPointTy()) { - NegVal = InsertNewInstBefore( - BinaryOperator::CreateFNeg(SubOp->getOperand(1), - "tmp"), SI); + if (SI.getType()->isFloatingPointTy()) { + NegVal = Builder->CreateFNeg(SubOp->getOperand(1)); } else { - NegVal = InsertNewInstBefore( - BinaryOperator::CreateNeg(SubOp->getOperand(1), - "tmp"), SI); + NegVal = Builder->CreateNeg(SubOp->getOperand(1)); } Value *NewTrueOp = OtherAddOp; Value *NewFalseOp = NegVal; if (AddOp != TI) std::swap(NewTrueOp, NewFalseOp); - Instruction *NewSel = - SelectInst::Create(CondVal, NewTrueOp, - NewFalseOp, SI.getName() + ".p"); + Value *NewSel = + Builder->CreateSelect(CondVal, NewTrueOp, + NewFalseOp, SI.getName() + ".p"); - NewSel = InsertNewInstBefore(NewSel, SI); if (SI.getType()->isFloatingPointTy()) return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); else diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 6e727ce..8fea8eb 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -313,7 +313,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); - return InsertNewInstBefore(Or, *I); + return InsertNewInstWith(Or, *I); } // If all of the demanded bits on one side are known, and all of the set @@ -327,7 +327,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, ~RHSKnownOne & DemandedMask); Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); - return InsertNewInstBefore(And, *I); + return InsertNewInstWith(And, *I); } } @@ -353,13 +353,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); - InsertNewInstBefore(NewAnd, *I); + InsertNewInstWith(NewAnd, *I); Constant *XorC = ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); - return InsertNewInstBefore(NewXor, *I); + return InsertNewInstWith(NewXor, *I); } // Output known-0 bits are known if clear or set in both the LHS & RHS. @@ -472,7 +472,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { // Convert to ZExt cast CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); - return InsertNewInstBefore(NewCast, *I); + return InsertNewInstWith(NewCast, *I); } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set KnownOne |= NewBits; } @@ -515,7 +515,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); - return InsertNewInstBefore(Or, *I); + return InsertNewInstWith(Or, *I); } // We can say something about the output known-zero and known-one bits, @@ -632,7 +632,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( I->getOperand(0), I->getOperand(1), I->getName()); - return InsertNewInstBefore(NewVal, *I); + return InsertNewInstWith(NewVal, *I); } // If the sign bit is the only bit demanded by this ashr, then there is no @@ -676,7 +676,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( I->getOperand(0), SA, I->getName()); - return InsertNewInstBefore(NewVal, *I); + return InsertNewInstWith(NewVal, *I); } else if ((KnownOne & SignBit) != 0) { // New bits are known one. KnownOne |= HighBits; } @@ -774,12 +774,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), ConstantInt::get(I->getType(), ResultBit-InputBit)); NewVal->takeName(I); - return InsertNewInstBefore(NewVal, *I); + return InsertNewInstWith(NewVal, *I); } // TODO: Could compute known zero/one bits based on the input. break; } + case Intrinsic::x86_sse42_crc32_64_8: + case Intrinsic::x86_sse42_crc32_64_64: + KnownZero = APInt::getHighBitsSet(64, 32); + return 0; } } ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); @@ -867,7 +871,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (Depth == 10) return 0; - // If multiple users are using the root value, procede with + // If multiple users are using the root value, proceed with // simplification conservatively assuming that all elements // are needed. if (!V->hasOneUse()) { @@ -1108,21 +1112,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Value *LHS = II->getArgOperand(0); Value *RHS = II->getArgOperand(1); // Extract the element as scalars. - LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, + LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); - RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, + RHS = InsertNewInstWith(ExtractElementInst::Create(RHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); switch (II->getIntrinsicID()) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse2_sub_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, + TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS, II->getName()), *II); break; case Intrinsic::x86_sse_mul_ss: case Intrinsic::x86_sse2_mul_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS, + TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS, II->getName()), *II); break; } @@ -1132,7 +1136,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefValue::get(II->getType()), TmpV, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false), II->getName()); - InsertNewInstBefore(New, *II); + InsertNewInstWith(New, *II); return New; } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7a84598..92c10f5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -240,9 +240,9 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Constant *C2 = cast<Constant>(Op1->getOperand(1)); Constant *Folded = ConstantExpr::get(Opcode, C1, C2); - Instruction *New = BinaryOperator::Create(Opcode, A, B, Op1->getName(), - &I); - Worklist.Add(New); + Instruction *New = BinaryOperator::Create(Opcode, A, B); + InsertNewInstWith(New, I); + New->takeName(Op1); I.setOperand(0, New); I.setOperand(1, Folded); // Conservatively clear the optional flags, since they may not be @@ -599,7 +599,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues(), ""); + PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues()); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); @@ -1088,8 +1088,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { // free undef -> unreachable. if (isa<UndefValue>(Op)) { // Insert a new store to null because we cannot modify the CFG here. - new StoreInst(ConstantInt::getTrue(FI.getContext()), - UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI); + Builder->CreateStore(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); return EraseInstFromFunction(FI); } @@ -1261,7 +1261,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::sadd_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); } @@ -1278,7 +1278,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::ssub_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateSub(LHS, RHS); } @@ -1287,7 +1287,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::smul_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateMul(LHS, RHS); } @@ -1385,8 +1385,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Worklist.push_back(BB); SmallVector<Instruction*, 128> InstrsForInstCombineWorklist; - SmallPtrSet<ConstantExpr*, 64> FoldedConstants; - + DenseMap<ConstantExpr*, Constant*> FoldedConstants; + do { BB = Worklist.pop_back_val(); @@ -1421,14 +1421,15 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(i); if (CE == 0) continue; - - // If we already folded this constant, don't try again. - if (!FoldedConstants.insert(CE)) - continue; - - Constant *NewC = ConstantFoldConstantExpression(CE, TD); - if (NewC && NewC != CE) { - *i = NewC; + + Constant*& FoldRes = FoldedConstants[CE]; + if (!FoldRes) + FoldRes = ConstantFoldConstantExpression(CE, TD); + if (!FoldRes) + FoldRes = CE; + + if (FoldRes != CE) { + *i = FoldRes; MadeIRChange = true; } } @@ -1575,6 +1576,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Now that we have an instruction, try combining it to simplify it. Builder->SetInsertPoint(I->getParent(), I); + Builder->SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG std::string OrigI; @@ -1589,7 +1591,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { DEBUG(errs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); - Result->setDebugLoc(I->getDebugLoc()); + if (!I->getDebugLoc().isUnknown()) + Result->setDebugLoc(I->getDebugLoc()); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 2425342..b902213 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -40,15 +40,15 @@ using namespace llvm; namespace { class GCOVProfiler : public ModulePass { - bool runOnModule(Module &M); public: static char ID; GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true) { + : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) { initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData) - : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData) { + GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false) + : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), + Use402Format(use402Format) { assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } @@ -57,6 +57,8 @@ namespace { } private: + bool runOnModule(Module &M); + // Create the GCNO files for the Module based on DebugInfo. void emitGCNO(DebugInfoFinder &DIF); @@ -86,10 +88,13 @@ namespace { // list. void insertCounterWriteout(DebugInfoFinder &, SmallVector<std::pair<GlobalVariable *, - uint32_t>, 8> &); + MDNode *>, 8> &); + + std::string mangleName(DICompileUnit CU, std::string NewStem); bool EmitNotes; bool EmitData; + bool Use402Format; Module *M; LLVMContext *Ctx; @@ -100,8 +105,9 @@ char GCOVProfiler::ID = 0; INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) -ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData) { - return new GCOVProfiler(EmitNotes, EmitData); +ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, + bool Use402Format) { + return new GCOVProfiler(EmitNotes, EmitData, Use402Format); } static DISubprogram findSubprogram(DIScope Scope) { @@ -137,7 +143,7 @@ namespace { // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs // padding out to the next 4-byte word. The length is measured in 4-byte // words including padding, not bytes of actual string. - return (s.size() + 5) / 4; + return (s.size() / 4) + 1; } void writeGCOVString(StringRef s) { @@ -247,7 +253,7 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(DISubprogram SP, raw_ostream *os) { + GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) { this->os = os; Function *F = SP.getFunction(); @@ -260,10 +266,14 @@ namespace { writeBytes(FunctionTag, 4); uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; + if (!Use402Format) + ++BlockLen; // For second checksum. write(BlockLen); uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); write(Ident); - write(0); // checksum + write(0); // checksum #1 + if (!Use402Format) + write(0); // checksum #2 writeGCOVString(SP.getName()); writeGCOVString(SP.getFilename()); write(SP.getLineNumber()); @@ -318,9 +328,25 @@ namespace { }; } -// Replace the stem of a file, or add one if missing. -static std::string replaceStem(std::string OrigFilename, std::string NewStem) { - return (sys::path::stem(OrigFilename) + "." + NewStem).str(); +std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) { + if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) { + for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) { + MDNode *N = GCov->getOperand(i); + if (N->getNumOperands() != 2) continue; + MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0)); + MDNode *CompileUnit = dyn_cast<MDNode>(N->getOperand(1)); + if (!GCovFile || !CompileUnit) continue; + if (CompileUnit == CU) { + SmallString<128> Filename = GCovFile->getString(); + sys::path::replace_extension(Filename, NewStem); + return Filename.str(); + } + } + } + + SmallString<128> Filename = CU.getFilename(); + sys::path::replace_extension(Filename, NewStem); + return sys::path::filename(Filename.str()); } bool GCOVProfiler::runOnModule(Module &M) { @@ -346,9 +372,12 @@ void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { DICompileUnit CU(*I); raw_fd_ostream *&out = GcnoFiles[CU]; std::string ErrorInfo; - out = new raw_fd_ostream(replaceStem(CU.getFilename(), "gcno").c_str(), - ErrorInfo, raw_fd_ostream::F_Binary); - out->write("oncg*404MVLL", 12); + out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, + raw_fd_ostream::F_Binary); + if (!Use402Format) + out->write("oncg*404MVLL", 12); + else + out->write("oncg*402MVLL", 12); } for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), @@ -358,7 +387,7 @@ void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { Function *F = SP.getFunction(); if (!F) continue; - GCOVFunction Func(SP, os); + GCOVFunction Func(SP, os, Use402Format); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { GCOVBlock &Block = Func.getBlock(BB); @@ -399,7 +428,7 @@ bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) { if (DIF.subprogram_begin() == DIF.subprogram_end()) return false; - SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> CountersByIdent; + SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { DISubprogram SP(*SPI); @@ -422,8 +451,7 @@ bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) { GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), "__llvm_gcov_ctr", 0, false, 0); - CountersByIdent.push_back( - std::make_pair(Counters, reinterpret_cast<intptr_t>((MDNode*)SP))); + CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); UniqueVector<BasicBlock *> ComplexEdgePreds; UniqueVector<BasicBlock *> ComplexEdgeSuccs; @@ -490,7 +518,7 @@ bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) { } } - insertCounterWriteout(DIF, CountersByIdent); + insertCounterWriteout(DIF, CountersBySP); return true; } @@ -561,7 +589,10 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { } Constant *GCOVProfiler::getEmitFunctionFunc() { - const Type *Args[] = { Type::getInt32Ty(*Ctx) }; + const Type *Args[2] = { + Type::getInt32Ty(*Ctx), // uint32_t ident + Type::getInt8PtrTy(*Ctx), // const char *function_name + }; const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); @@ -597,7 +628,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { void GCOVProfiler::insertCounterWriteout( DebugInfoFinder &DIF, - SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> &CountersByIdent) { + SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) { const FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = Function::Create(WriteoutFTy, @@ -615,14 +646,18 @@ void GCOVProfiler::insertCounterWriteout( for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(), CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) { DICompileUnit compile_unit(*CUI); - std::string FilenameGcda = replaceStem(compile_unit.getFilename(), "gcda"); + std::string FilenameGcda = mangleName(compile_unit, "gcda"); Builder.CreateCall(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda)); - for (SmallVector<std::pair<GlobalVariable *, uint32_t>, 8>::iterator - I = CountersByIdent.begin(), E = CountersByIdent.end(); + for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator + I = CountersBySP.begin(), E = CountersBySP.end(); I != E; ++I) { - Builder.CreateCall(EmitFunction, ConstantInt::get(Type::getInt32Ty(*Ctx), - I->second)); + DISubprogram SP(I->second); + intptr_t ident = reinterpret_cast<intptr_t>(I->second); + Builder.CreateCall2(EmitFunction, + ConstantInt::get(Type::getInt32Ty(*Ctx), ident), + Builder.CreateGlobalStringPtr(SP.getName())); + GlobalVariable *GV = I->first; unsigned Arcs = cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp index 6b3f12d..182a43d 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -1351,8 +1351,6 @@ bool PathProfiler::runOnModule(Module &M) { return false; } - BasicBlock::iterator insertPoint = Main->getEntryBlock().getFirstNonPHI(); - llvmIncrementHashFunction = M.getOrInsertFunction( "llvm_increment_path_count", Type::getVoidTy(*Context), // return type diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 0184390..0af14ed 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -147,7 +147,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (!DisableBranchOpts) { MadeChange = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - MadeChange |= ConstantFoldTerminator(BB); + MadeChange |= ConstantFoldTerminator(BB, true); if (MadeChange) ModifiedDT = true; @@ -371,9 +371,11 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. - if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) + if (TLI.getTypeAction(CI->getContext(), SrcVT) == + TargetLowering::TypePromoteInteger) SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); - if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) + if (TLI.getTypeAction(CI->getContext(), DstVT) == + TargetLowering::TypePromoteInteger) DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. @@ -548,7 +550,23 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // From here on out we're working with named functions. if (CI->getCalledFunction() == 0) return false; - + + // llvm.dbg.value is far away from the value then iSel may not be able + // handle it properly. iSel will drop llvm.dbg.value if it can not + // find a node corresponding to the value. + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI)) + if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue())) + if (!VI->isTerminator() && + (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) { + DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(VI->getParent()->getFirstNonPHI()); + else + DVI->insertAfter(VI); + return true; + } + // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index efecb97..2515fd1 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -952,12 +952,12 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, IntegerType::get(LoadTy->getContext(), NewLoadSize*8); DestPTy = PointerType::get(DestPTy, cast<PointerType>(PtrVal->getType())->getAddressSpace()); - + Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); LoadInst *NewLoad = Builder.CreateLoad(PtrVal); NewLoad->takeName(SrcVal); NewLoad->setAlignment(SrcVal->getAlignment()); - + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); @@ -1576,6 +1576,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + // Transfer DebugLoc. + NewLoad->setDebugLoc(LI->getDebugLoc()); + // Add the newly created load. ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, NewLoad)); @@ -1604,6 +1607,11 @@ bool GVN::processLoad(LoadInst *L) { if (L->isVolatile()) return false; + if (L->use_empty()) { + markInstructionForDeletion(L); + return true; + } + // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); @@ -2099,6 +2107,7 @@ bool GVN::performPRE(Function &F) { PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); + PREInstr->setDebugLoc(CurInst->getDebugLoc()); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); ++NumGVNPRE; @@ -2118,7 +2127,7 @@ bool GVN::performPRE(Function &F) { VN.add(Phi, ValNo); addToLeaderTable(ValNo, Phi, CurrentBlock); - + Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); if (Phi->getType()->isPointerTy()) { // Because we have added a PHI-use of the pointer value, it has now diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 09d569a..04ee7c8 100644 --- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -52,20 +52,30 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumRemoved , "Number of aux indvars removed"); +STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumInserted, "Number of canonical indvars added"); STATISTIC(NumReplaced, "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); +STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); + +// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis +// and referenced here. +namespace llvm { + extern bool DisableIVRewrite; +} namespace { class IndVarSimplify : public LoopPass { @@ -73,12 +83,13 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; + TargetData *TD; SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID) { + IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -101,15 +112,18 @@ namespace { private: bool isValidRewrite(Value *FromVal, Value *ToVal); - void EliminateIVComparisons(); - void EliminateIVRemainders(); + void SimplifyIVUsers(SCEVExpander &Rewriter); + void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); + void EliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned, + PHINode *IVPhi); void RewriteNonIntegerIVs(Loop *L); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - PHINode *IndVar, - BasicBlock *ExitingBlock, - BranchInst *BI, - SCEVExpander &Rewriter); + PHINode *IndVar, + SCEVExpander &Rewriter); + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); @@ -122,7 +136,7 @@ namespace { char IndVarSimplify::ID = 0; INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", - "Canonicalize Induction Variables", false, false) + "Induction Variable Simplification", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) @@ -130,7 +144,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(IVUsers) INITIALIZE_PASS_END(IndVarSimplify, "indvars", - "Canonicalize Induction Variables", false, false) + "Induction Variable Simplification", false, false) Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); @@ -183,17 +197,23 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { return true; } -/// LinearFunctionTestReplace - This method rewrites the exit condition of the -/// loop to be a canonical != comparison against the incremented loop induction -/// variable. This pass is able to rewrite the exit tests of any loop where the -/// SCEV analysis can determine a loop-invariant trip count of the loop, which -/// is actually a much broader range than just linear tests. -ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, - const SCEV *BackedgeTakenCount, - PHINode *IndVar, - BasicBlock *ExitingBlock, - BranchInst *BI, - SCEVExpander &Rewriter) { +/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken +/// count expression can be safely and cheaply expanded into an instruction +/// sequence that can be used by LinearFunctionTestReplace. +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || + BackedgeTakenCount->isZero()) + return false; + + if (!L->getExitingBlock()) + return false; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return false; + // Special case: If the backedge-taken count is a UDiv, it's very likely a // UDiv that ScalarEvolution produced in order to compute a precise // expression, rather than a UDiv from the user's code. If we can't find a @@ -201,23 +221,68 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // rewriting the loop. if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return 0; + if (!OrigCond) return false; const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); if (R != BackedgeTakenCount) { const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); if (L != BackedgeTakenCount) - return 0; + return false; } } + return true; +} + +/// getBackedgeIVType - Get the widest type used by the loop test after peeking +/// through Truncs. +/// +/// TODO: Unnecessary once LinearFunctionTestReplace is removed. +static const Type *getBackedgeIVType(Loop *L) { + if (!L->getExitingBlock()) + return 0; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return 0; + + const Type *Ty = 0; + for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); + OI != OE; ++OI) { + assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); + TruncInst *Trunc = dyn_cast<TruncInst>(*OI); + if (!Trunc) + continue; + + return Trunc->getSrcTy(); + } + return Ty; +} + +/// LinearFunctionTestReplace - This method rewrites the exit condition of the +/// loop to be a canonical != comparison against the incremented loop induction +/// variable. This pass is able to rewrite the exit tests of any loop where the +/// SCEV analysis can determine a loop-invariant trip count of the loop, which +/// is actually a much broader range than just linear tests. +ICmpInst *IndVarSimplify:: +LinearFunctionTestReplace(Loop *L, + const SCEV *BackedgeTakenCount, + PHINode *IndVar, + SCEVExpander &Rewriter) { + assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); // If the exiting block is not the same as the backedge block, we must compare // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. Value *CmpIndVar; const SCEV *RHS = BackedgeTakenCount; - if (ExitingBlock == L->getLoopLatch()) { + if (L->getExitingBlock() == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. @@ -240,7 +305,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock); + CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); } else { // We have to use the preincremented value... RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, @@ -418,96 +483,519 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { SE->forgetLoop(L); } -void IndVarSimplify::EliminateIVComparisons() { - // Look for ICmp users. - for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { - IVStrideUse &UI = *I; - ICmpInst *ICmp = dyn_cast<ICmpInst>(UI.getUser()); - if (!ICmp) continue; - - bool Swapped = UI.getOperandValToReplace() == ICmp->getOperand(1); - ICmpInst::Predicate Pred = ICmp->getPredicate(); - if (Swapped) Pred = ICmpInst::getSwappedPredicate(Pred); - - // Get the SCEVs for the ICmp operands. - const SCEV *S = IU->getReplacementExpr(UI); - const SCEV *X = SE->getSCEV(ICmp->getOperand(!Swapped)); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // If the condition is always true or always false, replace it with - // a constant value. - if (SE->isKnownPredicate(Pred, S, X)) - ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); - else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) - ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); - else - continue; +namespace { + // Collect information about induction variables that are used by sign/zero + // extend operations. This information is recorded by CollectExtend and + // provides the input to WidenIV. + struct WideIVInfo { + const Type *WidestNativeType; // Widest integer type created [sz]ext + bool IsSigned; // Was an sext user seen before a zext? + + WideIVInfo() : WidestNativeType(0), IsSigned(false) {} + }; + typedef std::map<PHINode *, WideIVInfo> WideIVMap; +} + +/// CollectExtend - Update information about the induction variable that is +/// extended by this sign or zero extend operation. This is used to determine +/// the final width of the IV before actually widening it. +static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned, + WideIVMap &IVMap, ScalarEvolution *SE, + const TargetData *TD) { + const Type *Ty = Cast->getType(); + uint64_t Width = SE->getTypeSizeInBits(Ty); + if (TD && !TD->isLegalInteger(Width)) + return; - DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); - DeadInsts.push_back(ICmp); + WideIVInfo &IVInfo = IVMap[Phi]; + if (!IVInfo.WidestNativeType) { + IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); + IVInfo.IsSigned = IsSigned; + return; } + + // We extend the IV to satisfy the sign of its first user, arbitrarily. + if (IVInfo.IsSigned != IsSigned) + return; + + if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType)) + IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); } -void IndVarSimplify::EliminateIVRemainders() { - // Look for SRem and URem users. - for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { - IVStrideUse &UI = *I; - BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser()); - if (!Rem) continue; +namespace { +/// WidenIV - The goal of this transform is to remove sign and zero extends +/// without creating any new induction variables. To do this, it creates a new +/// phi of the wider type and redirects all users, either removing extends or +/// inserting truncs whenever we stop propagating the type. +/// +class WidenIV { + PHINode *OrigPhi; + const Type *WideType; + bool IsSigned; + + IVUsers *IU; + LoopInfo *LI; + Loop *L; + ScalarEvolution *SE; + DominatorTree *DT; + SmallVectorImpl<WeakVH> &DeadInsts; + + PHINode *WidePhi; + Instruction *WideInc; + const SCEV *WideIncExpr; + + SmallPtrSet<Instruction*,16> Processed; + +public: + WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers, + LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, + SmallVectorImpl<WeakVH> &DI) : + OrigPhi(PN), + WideType(IVInfo.WidestNativeType), + IsSigned(IVInfo.IsSigned), + IU(IUsers), + LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), + SE(SEv), + DT(DTree), + DeadInsts(DI), + WidePhi(0), + WideInc(0), + WideIncExpr(0) { + assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); + } - bool isSigned = Rem->getOpcode() == Instruction::SRem; - if (!isSigned && Rem->getOpcode() != Instruction::URem) - continue; + bool CreateWideIV(SCEVExpander &Rewriter); - // We're only interested in the case where we know something about - // the numerator. - if (UI.getOperandValToReplace() != Rem->getOperand(0)) - continue; +protected: + Instruction *CloneIVUser(Instruction *NarrowUse, + Instruction *NarrowDef, + Instruction *WideDef); - // Get the SCEVs for the ICmp operands. - const SCEV *S = SE->getSCEV(Rem->getOperand(0)); - const SCEV *X = SE->getSCEV(Rem->getOperand(1)); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // i % n --> i if i is in [0,n). - if ((!isSigned || SE->isKnownNonNegative(S)) && - SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - S, X)) - Rem->replaceAllUsesWith(Rem->getOperand(0)); - else { - // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). - const SCEV *LessOne = - SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); - if ((!isSigned || SE->isKnownNonNegative(LessOne)) && - SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - LessOne, X)) { - ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, - Rem->getOperand(0), Rem->getOperand(1), - "tmp"); - SelectInst *Sel = - SelectInst::Create(ICmp, - ConstantInt::get(Rem->getType(), 0), - Rem->getOperand(0), "tmp", Rem); - Rem->replaceAllUsesWith(Sel); - } else + const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); + + Instruction *WidenIVUse(Instruction *NarrowUse, + Instruction *NarrowDef, + Instruction *WideDef); +}; +} // anonymous namespace + +/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this +/// loop. IVUsers is treated as a worklist. Each successive simplification may +/// push more users which may themselves be candidates for simplification. +/// +void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { + WideIVMap IVMap; + + // Each round of simplification involves a round of eliminating operations + // followed by a round of widening IVs. A single IVUsers worklist is used + // across all rounds. The inner loop advances the user. If widening exposes + // more uses, then another pass through the outer loop is triggered. + for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) { + for(; I != E; ++I) { + Instruction *UseInst = I->getUser(); + Value *IVOperand = I->getOperandValToReplace(); + + if (DisableIVRewrite) { + if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { + CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD); + continue; + } + } + } + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + EliminateIVComparison(ICmp, IVOperand); continue; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi()); + continue; + } + } + } + for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end(); + I != E; ++I) { + WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts); + if (Widener.CreateWideIV(Rewriter)) + Changed = true; } + } +} - // Inform IVUsers about the new users. - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) - IU->AddUsersIfInteresting(I); +static Value *getExtend( Value *NarrowOper, const Type *WideType, + bool IsSigned, IRBuilder<> &Builder) { + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : + Builder.CreateZExt(NarrowOper, WideType); +} - DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); - DeadInsts.push_back(Rem); +/// CloneIVUser - Instantiate a wide operation to replace a narrow +/// operation. This only needs to handle operations that can evaluation to +/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. +Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, + Instruction *NarrowDef, + Instruction *WideDef) { + unsigned Opcode = NarrowUse->getOpcode(); + switch (Opcode) { + default: + return 0; + case Instruction::Add: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n"); + + IRBuilder<> Builder(NarrowUse); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know + // anything about the narrow operand yet so must insert a [sz]ext. It is + // probably loop invariant and will be folded or hoisted. If it actually + // comes from a widened IV, it should be removed during a future call to + // WidenIVUse. + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : + getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : + getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder); + + BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse); + BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), + LHS, RHS, + NarrowBO->getName()); + Builder.Insert(WideBO); + if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); + if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); + + return WideBO; } + llvm_unreachable(0); +} + +// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' +// perspective after widening it's type? In other words, can the extend be +// safely hoisted out of the loop with SCEV reducing the value to a recurrence +// on the same loop. If so, return the sign or zero extended +// recurrence. Otherwise return NULL. +const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { + if (!SE->isSCEVable(NarrowUse->getType())) + return 0; + + const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); + const SCEV *WideExpr = IsSigned ? + SE->getSignExtendExpr(NarrowExpr, WideType) : + SE->getZeroExtendExpr(NarrowExpr, WideType); + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return 0; + + return AddRec; +} + +/// HoistStep - Attempt to hoist an IV increment above a potential use. +/// +/// To successfully hoist, two criteria must be met: +/// - IncV operands dominate InsertPos and +/// - InsertPos dominates IncV +/// +/// Meeting the second condition means that we don't need to check all of IncV's +/// existing uses (it's moving up in the domtree). +/// +/// This does not yet recursively hoist the operands, although that would +/// not be difficult. +static bool HoistStep(Instruction *IncV, Instruction *InsertPos, + const DominatorTree *DT) +{ + if (DT->dominates(IncV, InsertPos)) + return true; + + if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + // Attempt to hoist IncV + for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); + OI != OE; ++OI) { + Instruction *OInst = dyn_cast<Instruction>(OI); + if (OInst && !DT->dominates(OInst, InsertPos)) + return false; + } + IncV->moveBefore(InsertPos); + return true; +} + +/// WidenIVUse - Determine whether an individual user of the narrow IV can be +/// widened. If so, return the wide clone of the user. +Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, + Instruction *NarrowDef, + Instruction *WideDef) { + // To be consistent with IVUsers, stop traversing the def-use chain at + // inner-loop phis or post-loop phis. + if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) + return 0; + + // Handle data flow merges and bizarre phi cycles. + if (!Processed.insert(NarrowUse)) + return 0; + + // Our raison d'etre! Eliminate sign and zero extension. + if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) { + Value *NewDef = WideDef; + if (NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + if (CastWidth < IVWidth) { + // The cast isn't as wide as the IV, so insert a Trunc. + IRBuilder<> Builder(NarrowUse); + NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); + } + else { + // A wider extend was hidden behind a narrower one. This may induce + // another round of IV widening in which the intermediate IV becomes + // dead. It should be very rare. + DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi + << " not wide enough to subsume " << *NarrowUse << "\n"); + NarrowUse->replaceUsesOfWith(NarrowDef, WideDef); + NewDef = NarrowUse; + } + } + if (NewDef != NarrowUse) { + DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse + << " replaced by " << *WideDef << "\n"); + ++NumElimExt; + NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.push_back(NarrowUse); + } + // Now that the extend is gone, expose it's uses to IVUsers for potential + // further simplification within SimplifyIVUsers. + IU->AddUsersIfInteresting(WideDef, WidePhi); + + // No further widening is needed. The deceased [sz]ext had done it for us. + return 0; + } + const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); + if (!WideAddRec) { + // This user does not evaluate to a recurence after widening, so don't + // follow it. Instead insert a Trunc to kill off the original use, + // eventually isolating the original narrow IV so it can be removed. + IRBuilder<> Builder(NarrowUse); + Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); + NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); + return 0; + } + // Reuse the IV increment that SCEVExpander created as long as it dominates + // NarrowUse. + Instruction *WideUse = 0; + if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) { + WideUse = WideInc; + } + else { + WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef); + if (!WideUse) + return 0; + } + // GetWideRecurrence ensured that the narrow expression could be extended + // outside the loop without overflow. This suggests that the wide use + // evaluates to the same expression as the extended narrow use, but doesn't + // absolutely guarantee it. Hence the following failsafe check. In rare cases + // where it fails, we simple throw away the newly created wide use. + if (WideAddRec != SE->getSCEV(WideUse)) { + DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse + << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); + DeadInsts.push_back(WideUse); + return 0; + } + + // Returning WideUse pushes it on the worklist. + return WideUse; +} + +/// CreateWideIV - Process a single induction variable. First use the +/// SCEVExpander to create a wide induction variable that evaluates to the same +/// recurrence as the original narrow IV. Then use a worklist to forward +/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all +/// interesting IV users, the narrow IV will be isolated for removal by +/// DeleteDeadPHIs. +/// +/// It would be simpler to delete uses as they are processed, but we must avoid +/// invalidating SCEV expressions. +/// +bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { + // Is this phi an induction variable? + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); + if (!AddRec) + return false; + + // Widen the induction variable expression. + const SCEV *WideIVExpr = IsSigned ? + SE->getSignExtendExpr(AddRec, WideType) : + SE->getZeroExtendExpr(AddRec, WideType); + + assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && + "Expect the new IV expression to preserve its type"); + + // Can the IV be extended outside the loop without overflow? + AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); + if (!AddRec || AddRec->getLoop() != L) + return false; + + // An AddRec must have loop-invariant operands. Since this AddRec it + // materialized by a loop header phi, the expression cannot have any post-loop + // operands, so they must dominate the loop header. + assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) && + SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) + && "Loop header phi recurrence inputs do not dominate the loop"); + + // The rewriter provides a value for the desired IV expression. This may + // either find an existing phi or materialize a new one. Either way, we + // expect a well-formed cyclic phi-with-increments. i.e. any operand not part + // of the phi-SCC dominates the loop entry. + Instruction *InsertPt = L->getHeader()->begin(); + WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); + + // Remembering the WideIV increment generated by SCEVExpander allows + // WidenIVUse to reuse it when widening the narrow IV's increment. We don't + // employ a general reuse mechanism because the call above is the only call to + // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + WideInc = + cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); + WideIncExpr = SE->getSCEV(WideInc); + } + + DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); + ++NumWidened; + + // Traverse the def-use chain using a worklist starting at the original IV. + assert(Processed.empty() && "expect initial state" ); + + // Each worklist entry has a Narrow def-use link and Wide def. + SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; + for (Value::use_iterator UI = OrigPhi->use_begin(), + UE = OrigPhi->use_end(); UI != UE; ++UI) { + NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi)); + } + while (!NarrowIVUsers.empty()) { + Use *NarrowDefUse; + Instruction *WideDef; + tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val(); + + // Process a def-use edge. This may replace the use, so don't hold a + // use_iterator across it. + Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get()); + Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser()); + Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef); + + // Follow all def-use edges from the previous narrow use. + if (WideUse) { + for (Value::use_iterator UI = NarrowUse->use_begin(), + UE = NarrowUse->use_end(); UI != UE; ++UI) { + NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse)); + } + } + // WidenIVUse may have removed the def-use edge. + if (NarrowDef->use_empty()) + DeadInsts.push_back(NarrowDef); + } + return true; +} + +void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); + const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // If the condition is always true or always false, replace it with + // a constant value. + if (SE->isKnownPredicate(Pred, S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); + else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + else + return; + + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + ++NumElimCmp; + Changed = true; + DeadInsts.push_back(ICmp); +} + +void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned, + PHINode *IVPhi) { + // We're only interested in the case where we know something about + // the numerator. + if (IVOperand != Rem->getOperand(0)) + return; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!IsSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = + SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + if (IsSigned && !SE->isKnownNonNegative(LessOne)) + return; + + if (!SE->isKnownPredicate(IsSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) + return; + + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1), + "tmp"); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } + + // Inform IVUsers about the new users. + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I, IVPhi); + + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.push_back(Rem); } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -526,6 +1014,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); + DeadInsts.clear(); Changed = false; @@ -533,11 +1023,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // transform them to use integer recurrences. RewriteNonIntegerIVs(L); - BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE); + if (DisableIVRewrite) + Rewriter.disableCanonicalMode(); // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -548,33 +1039,42 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); - // Simplify ICmp IV users. - EliminateIVComparisons(); - - // Simplify SRem and URem IV users. - EliminateIVRemainders(); + // Eliminate redundant IV users. + SimplifyIVUsers(Rewriter); // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. const Type *LargestType = 0; bool NeedCannIV = false; - if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { - LargestType = BackedgeTakenCount->getType(); - LargestType = SE->getEffectiveSCEVType(LargestType); + bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); + if (ExpandBECount) { // If we have a known trip count and a single exit block, we'll be // rewriting the loop exit test condition below, which requires a // canonical induction variable. - if (ExitingBlock) - NeedCannIV = true; - } - for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { - const Type *Ty = - SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); + NeedCannIV = true; + const Type *Ty = BackedgeTakenCount->getType(); + if (DisableIVRewrite) { + // In this mode, SimplifyIVUsers may have already widened the IV used by + // the backedge test and inserted a Trunc on the compare's operand. Get + // the wider type to avoid creating a redundant narrow IV only used by the + // loop test. + LargestType = getBackedgeIVType(L); + } if (!LargestType || SE->getTypeSizeInBits(Ty) > + SE->getTypeSizeInBits(LargestType)) + LargestType = SE->getEffectiveSCEVType(Ty); + } + if (!DisableIVRewrite) { + for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { + NeedCannIV = true; + const Type *Ty = + SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); + if (!LargestType || + SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) - LargestType = Ty; - NeedCannIV = true; + LargestType = Ty; + } } // Now that we know the largest of the induction variable expressions @@ -614,19 +1114,17 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. ICmpInst *NewICmp = 0; - if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && - !BackedgeTakenCount->isZero() && - ExitingBlock) { + if (ExpandBECount) { + assert(canExpandBackedgeTakenCount(L, SE) && + "canonical IV disrupted BackedgeTaken expansion"); assert(NeedCannIV && "LinearFunctionTestReplace requires a canonical induction variable"); - // Can't rewrite non-branch yet. - if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) - NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, - ExitingBlock, BI, Rewriter); + NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, + Rewriter); } - // Rewrite IV-derived expressions. - RewriteIVExpressions(L, Rewriter); + if (!DisableIVRewrite) + RewriteIVExpressions(L, Rewriter); // Clear the rewriter cache, because values that are in the rewriter's cache // can be deleted in the loop below, causing the AssertingVH in the cache to @@ -649,7 +1147,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // For completeness, inform IVUsers of the IV use in the newly-created // loop exit test instruction. if (NewICmp) - IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); + IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)), + IndVar); // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); @@ -1080,5 +1579,5 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { } // Add a new IVUsers entry for the newly-created integer PHI. - IU->AddUsersIfInteresting(NewPHI); + IU->AddUsersIfInteresting(NewPHI, NewPHI); } diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 7168177..cf18ff0 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -706,7 +706,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { DEBUG(dbgs() << " In block '" << BB->getName() << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; - ConstantFoldTerminator(BB); + ConstantFoldTerminator(BB, true); return true; } @@ -929,9 +929,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (UnavailablePred) { assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && "Can't handle critical edge here!"); - Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, + LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, LI->getAlignment(), UnavailablePred->getTerminator()); + NewVal->setDebugLoc(LI->getDebugLoc()); AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } @@ -944,6 +945,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", LoadBB->begin()); PN->takeName(LI); + PN->setDebugLoc(LI->getDebugLoc()); // Insert new entries into the PHI for each predecessor. A single block may // have multiple entries here. @@ -1375,7 +1377,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // We didn't copy the terminator from BB over to NewBB, because there is now // an unconditional jump to SuccBB. Insert the unconditional jump. - BranchInst::Create(SuccBB, NewBB); + BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB); + NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc()); // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the // PHI nodes for NewBB now. diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index 93de9cf..13bd022 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -372,7 +372,11 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { return !pointerInvalidatedByLoop(LI->getOperand(0), Size, LI->getMetadata(LLVMContext::MD_tbaa)); } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { - // Handle obvious cases efficiently. + // Don't sink or hoist dbg info; it's legal, but not useful. + if (isa<DbgInfoIntrinsic>(I)) + return false; + + // Handle simple cases by querying alias analysis. AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == AliasAnalysis::DoesNotAccessMemory) return true; @@ -445,8 +449,7 @@ void LICM::sink(Instruction &I) { // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { - if (!isa<DbgInfoIntrinsic>(I) && - !DT->dominates(I.getParent(), ExitBlocks[0])) { + if (!DT->dominates(I.getParent(), ExitBlocks[0])) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. @@ -602,13 +605,15 @@ namespace { SmallPtrSet<Value*, 4> &PointerMustAliases; SmallVectorImpl<BasicBlock*> &LoopExitBlocks; AliasSetTracker &AST; + DebugLoc DL; public: LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, - SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast) - : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), - LoopExitBlocks(LEB), AST(ast) {} + SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, + DebugLoc dl) + : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP), + PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { @@ -629,7 +634,8 @@ namespace { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); - new StoreInst(LiveInValue, SomePtr, InsertPos); + StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); + NewSI->setDebugLoc(DL); } } @@ -727,6 +733,12 @@ void LICM::PromoteAliasSet(AliasSet &AS) { Changed = true; ++NumPromoted; + // Grab a debug location for the inserted loads/stores; given that the + // inserted loads/stores have little relation to the original loads/stores, + // this code just arbitrarily picks a location from one, since any debug + // location is better than none. + DebugLoc DL = LoopUses[0]->getDebugLoc(); + SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); @@ -734,13 +746,14 @@ void LICM::PromoteAliasSet(AliasSet &AS) { SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST); + *CurAST, DL); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst(SomePtr, SomePtr->getName()+".promoted", Preheader->getTerminator()); + PreheaderLoad->setDebugLoc(DL); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Rewrite all the loads in the loop and remember all the definitions from diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 1366231..dbf6eec 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -128,11 +128,11 @@ INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } -/// DeleteDeadInstruction - Delete this instruction. Before we do, go through +/// deleteDeadInstruction - Delete this instruction. Before we do, go through /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -162,6 +162,14 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { } while (!NowDeadInsts.empty()); } +/// deleteIfDeadInstruction - If the specified value is a dead instruction, +/// delete it and any recursively used instructions. +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (isInstructionTriviallyDead(I)) + deleteDeadInstruction(I, SE); +} + bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; @@ -454,31 +462,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, return false; } - - // Okay, we have a strided store "p[i]" of a splattable value. We can turn - // this into a memset in the loop preheader now if we want. However, this - // would be unsafe to do if there is anything else in the loop that may read - // or write to the aliased location. Check for an alias. - if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef, - CurLoop, BECount, - StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) - return false; - - // Okay, everything looks good, insert the memset. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - - IRBuilder<> Builder(Preheader->getTerminator()); - // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the - // header. Just insert code for it in the preheader. + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE); - + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn + // this into a memset in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write to the aliased location. Check for any overlap by generating the + // base pointer and checking the region. unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); + + if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, + CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(BasePtr, *SE); + return false; + } + + // Okay, everything looks good, insert the memset. + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); @@ -521,7 +533,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - DeleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE); ++NumMemSet; return true; } @@ -539,41 +551,51 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + // The trip count of the loop and the base pointer of the addrec SCEV is + // guaranteed to be loop invariant, which means that it should dominate the + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read - // or write to the stored location (including the load feeding the stores). - // Check for an alias. - if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef, + // or write the memory region we're storing to. This includes the load that + // feeds the stores. Check for an alias by generating the base address and + // checking everything. + Value *StoreBasePtr = + Expander.expandCodeFor(StoreEv->getStart(), + Builder.getInt8PtrTy(SI->getPointerAddressSpace()), + Preheader->getTerminator()); + + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, CurLoop, BECount, StoreSize, - getAnalysis<AliasAnalysis>(), SI)) + getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(StoreBasePtr, *SE); return false; + } // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. - if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod, - CurLoop, BECount, StoreSize, - getAnalysis<AliasAnalysis>(), SI)) - return false; - - // Okay, everything looks good, insert the memcpy. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - - IRBuilder<> Builder(Preheader->getTerminator()); - - // The trip count of the loop and the base pointer of the addrec SCEV is - // guaranteed to be loop invariant, which means that it should dominate the - // header. Just insert code for it in the preheader. - SCEVExpander Expander(*SE); - Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); - Value *StoreBasePtr = - Expander.expandCodeFor(StoreEv->getStart(), - Builder.getInt8PtrTy(SI->getPointerAddressSpace()), - Preheader->getTerminator()); + + if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(LoadBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE); + return false; + } + + // Okay, everything is safe, we can transform this! + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. @@ -589,18 +611,19 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - Value *NewCall = + CallInst *NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, std::min(SI->getAlignment(), LI->getAlignment())); + NewCall->setDebugLoc(SI->getDebugLoc()); DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); - (void)NewCall; + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - DeleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE); ++NumMemCpy; return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5abc790..73ebd61 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -209,7 +209,12 @@ struct Formula { /// when AM.Scale is not zero. const SCEV *ScaledReg; - Formula() : ScaledReg(0) {} + /// UnfoldedOffset - An additional constant offset which added near the + /// use. This requires a temporary register, but the offset itself can + /// live in an add immediate field rather than a register. + int64_t UnfoldedOffset; + + Formula() : ScaledReg(0), UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -379,6 +384,10 @@ void Formula::print(raw_ostream &OS) const { OS << "<unknown>"; OS << ')'; } + if (UnfoldedOffset != 0) { + if (!First) OS << " + "; else First = false; + OS << "imm(" << UnfoldedOffset << ')'; + } } void Formula::dump() const { @@ -771,8 +780,10 @@ void Cost::RateFormula(const Formula &F, RatePrimaryRegister(BaseReg, Regs, L, SE, DT); } - if (F.BaseRegs.size() > 1) - NumBaseAdds += F.BaseRegs.size() - 1; + // Determine how many (unfolded) adds we'll need inside the loop. + size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0); + if (NumBaseParts > 1) + NumBaseAdds += NumBaseParts - 1; // Tally up the non-zero immediates. for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), @@ -1793,7 +1804,8 @@ LSRInstance::OptimizeLoopTermCond() { ExitingBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); + CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(), + CondUse->getPhi()); TermBr->replaceUsesOfWith(OldCond, Cond); } } @@ -1945,7 +1957,8 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && F.AM.BaseGV == OrigF.AM.BaseGV && - F.AM.Scale == OrigF.AM.Scale) { + F.AM.Scale == OrigF.AM.Scale && + F.UnfoldedOffset == OrigF.UnfoldedOffset) { if (F.AM.BaseOffs == 0) return &LU; // This is the formula where all the registers and symbols matched; @@ -2061,6 +2074,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); if (SE.isLoopInvariant(N, L)) { + // S is normalized, so normalize N before folding it into S + // to keep the result normalized. + N = TransformForPostIncUse(Normalize, N, CI, 0, + LF.PostIncLoops, SE, DT); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); } @@ -2313,8 +2330,29 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, if (InnerSum->isZero()) continue; Formula F = Base; - F.BaseRegs[i] = InnerSum; - F.BaseRegs.push_back(*J); + + // Add the remaining pieces of the add back into the new formula. + const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); + if (TLI && InnerSumSC && + SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && + TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue(); + F.BaseRegs.erase(F.BaseRegs.begin() + i); + } else + F.BaseRegs[i] = InnerSum; + + // Add J as its own register, or an unfolded immediate. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); + if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue(); + else + F.BaseRegs.push_back(*J); + if (InsertFormula(LU, LUIdx, F)) // If that formula hadn't been seen before, recurse to find more like // it. @@ -2482,6 +2520,15 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, continue; } + // Check that multiplying with the unfolded offset doesn't overflow. + if (F.UnfoldedOffset != 0) { + if (F.UnfoldedOffset == INT64_MIN && Factor == -1) + continue; + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor; + if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) + continue; + } + // If we make it here and it's legal, add it. (void)InsertFormula(LU, LUIdx, F); next:; @@ -2664,7 +2711,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // other orig regs. ImmMapTy::const_iterator OtherImms[] = { Imms.begin(), prior(Imms.end()), - Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) + Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) }; for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) { ImmMapTy::const_iterator M = OtherImms[i]; @@ -2738,8 +2785,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { Formula NewF = F; NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) - continue; + LU.Kind, LU.AccessTy, TLI)) { + if (!TLI || + !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) + continue; + NewF = F; + NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; + } NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); // If the new formula has a constant in a register, and adding the @@ -3488,6 +3540,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } } + // Expand the unfolded offset portion. + int64_t UnfoldedOffset = F.UnfoldedOffset; + if (UnfoldedOffset != 0) { + // Just add the immediate values. + Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, + UnfoldedOffset))); + } + // Emit instructions summing all the operands. const SCEV *FullS = Ops.empty() ? SE.getConstant(IntTy, 0) : diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index b4e3d31..e05f29c 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -258,6 +258,7 @@ bool LoopUnswitch::processCurrentLoop() { if (LoopCond && SI->getNumCases() > 1) { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! + // FIXME: scan for a case with a non-critical edge? Constant *UnswitchVal = SI->getCaseValue(1); // Do not process same value again and again. if (!UnswitchedVals.insert(UnswitchVal)) @@ -560,6 +561,8 @@ void LoopUnswitch::SplitExitEdges(Loop *L, BasicBlock *ExitBlock = ExitBlocks[i]; SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); + // Although SplitBlockPredecessors doesn't preserve loop-simplify in + // general, if we call it on all predecessors of all exits then it does. SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), ".us-lcssa", this); } @@ -786,8 +789,13 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, // If this is the edge to the header block for a loop, remove the loop and // promote all subloops. if (Loop *BBLoop = LI->getLoopFor(BB)) { - if (BBLoop->getLoopLatch() == BB) + if (BBLoop->getLoopLatch() == BB) { RemoveLoopFromHierarchy(BBLoop); + if (currentLoop == BBLoop) { + currentLoop = 0; + redoLoop = false; + } + } } // Remove the block from the loop info, which removes it from any loops it @@ -859,7 +867,6 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, // selects, switches. - std::vector<User*> Users(LIC->use_begin(), LIC->use_end()); std::vector<Instruction*> Worklist; LLVMContext &Context = Val->getContext(); @@ -875,13 +882,14 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), !cast<ConstantInt>(Val)->getZExtValue()); - for (unsigned i = 0, e = Users.size(); i != e; ++i) - if (Instruction *U = cast<Instruction>(Users[i])) { - if (!L->contains(U)) - continue; - U->replaceUsesOfWith(LIC, Replacement); - Worklist.push_back(U); - } + for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); + UI != E; ++UI) { + Instruction *U = dyn_cast<Instruction>(*UI); + if (!U || !L->contains(U)) + continue; + U->replaceUsesOfWith(LIC, Replacement); + Worklist.push_back(U); + } SimplifyCode(Worklist, L); return; } @@ -889,9 +897,10 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // Otherwise, we don't know the precise value of LIC, but we do know that it // is certainly NOT "Val". As such, simplify any uses in the loop that we // can. This case occurs when we unswitch switch statements. - for (unsigned i = 0, e = Users.size(); i != e; ++i) { - Instruction *U = cast<Instruction>(Users[i]); - if (!L->contains(U)) + for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); + UI != E; ++UI) { + Instruction *U = dyn_cast<Instruction>(*UI); + if (!U || !L->contains(U)) continue; Worklist.push_back(U); @@ -909,13 +918,22 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // Found a dead case value. Don't remove PHI nodes in the // successor if they become single-entry, those PHI nodes may // be in the Users list. - + + BasicBlock *Switch = SI->getParent(); + BasicBlock *SISucc = SI->getSuccessor(DeadCase); + BasicBlock *Latch = L->getLoopLatch(); + if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. + // If the DeadCase successor dominates the loop latch, then the + // transformation isn't safe since it will delete the sole predecessor edge + // to the latch. + if (Latch && DT->dominates(SISucc, Latch)) + continue; + // FIXME: This is a hack. We need to keep the successor around // and hooked up so as to preserve the loop structure, because // trying to update it is complicated. So instead we preserve the // loop structure and put the block on a dead code path. - BasicBlock *Switch = SI->getParent(); - SplitEdge(Switch, SI->getSuccessor(DeadCase), this); + SplitEdge(Switch, SISucc, this); // Compute the successors instead of relying on the return value // of SplitEdge, since it may have split the switch successor // after PHI nodes. diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a3035cb..be5aa2e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" @@ -459,7 +460,10 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); - + + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + // Zap all the stores. for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), @@ -484,11 +488,28 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (!LI->isVolatile() && LI->hasOneUse()) { - MemDepResult dep = MD->getDependency(LI); + MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; - if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst())) - C = dyn_cast<CallInst>(dep.getInst()); - + if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) + C = dyn_cast<CallInst>(ldep.getInst()); + + if (C) { + // Check that nothing touches the dest of the "copy" between + // the call and the store. + MemDepResult sdep = MD->getDependency(SI); + if (!sdep.isNonLocal()) { + bool FoundCall = false; + for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) { + if (&*I == C) { + FoundCall = true; + break; + } + } + if (!FoundCall) + C = 0; + } + } + if (C) { bool changed = performCallSlotOptzn(LI, SI->getPointerOperand()->stripPointerCasts(), @@ -863,12 +884,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) return false; - // Get the alignment of the byval. If it is greater than the memcpy, then we - // can't do the substitution. If the call doesn't specify the alignment, then - // it is some target specific value that we can't know. + // Get the alignment of the byval. If the call doesn't specify the alignment, + // then it is some target specific value that we can't know. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); - if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign) - return false; + if (ByValAlign == 0) return false; + + // If it is greater than the memcpy, then we check to see if we can force the + // source of the memcpy to the alignment we need. If we fail, we bail out. + if (MDep->getAlignment() < ByValAlign && + getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) + return false; // Verify that the copied-from memory doesn't change in between the memcpy and // the byval call. diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index db8eb85..083412e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -655,7 +655,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { // Just mark all destinations executable! // TODO: This could be improved if the operand is a [cast of a] BlockAddress. - if (isa<IndirectBrInst>(&TI)) + if (isa<IndirectBrInst>(TI)) return true; #ifndef NDEBUG diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 8178c27..8938b28 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -30,6 +30,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" @@ -341,7 +342,8 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) { + if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && + (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) { if (!VectorTy) { VectorTy = VectorType::get(In, AllocaSize/EltSize); return; @@ -741,8 +743,9 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + unsigned FromTypeSize = TD.getTypeAllocSize(FromType); unsigned ToTypeSize = TD.getTypeAllocSize(ToType); - if (ToTypeSize == AllocaSize) { + if (FromTypeSize == ToTypeSize) { // If the two types have the same primitive size, use a bit cast. // Otherwise, it is two vectors with the same element type that has // the same allocation size but different number of elements so use @@ -754,13 +757,13 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, return CreateShuffleVectorCast(FromVal, ToType, Builder); } - if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + if (isPowerOf2_64(FromTypeSize / ToTypeSize)) { assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " "of a smaller vector type at a nonzero offset."); const Type *CastElementTy = getScaledElementType(FromType, ToType, ToTypeSize * 8); - unsigned NumCastVectorElements = AllocaSize / ToTypeSize; + unsigned NumCastVectorElements = FromTypeSize / ToTypeSize; LLVMContext &Context = FromVal->getContext(); const Type *CastTy = VectorType::get(CastElementTy, @@ -1051,8 +1054,9 @@ namespace { class AllocaPromoter : public LoadAndStorePromoter { AllocaInst *AI; public: - AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S) - : LoadAndStorePromoter(Insts, S), AI(0) {} + AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, + DbgDeclareInst *DD, DIBuilder *&DB) + : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {} void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { // Remember which alloca we're promoting (for isInstInList). @@ -1329,7 +1333,6 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { return true; } - bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; DominatorTree *DT = 0; @@ -1340,6 +1343,7 @@ bool SROA::performPromotion(Function &F) { bool Changed = false; SmallVector<Instruction*, 64> Insts; + DIBuilder *DIB = 0; while (1) { Allocas.clear(); @@ -1363,8 +1367,11 @@ bool SROA::performPromotion(Function &F) { for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) Insts.push_back(cast<Instruction>(*UI)); - - AllocaPromoter(Insts, SSA).run(AI, Insts); + + DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); + if (DDI && !DIB) + DIB = new DIBuilder(*AI->getParent()->getParent()->getParent()); + AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts); Insts.clear(); } } @@ -1372,6 +1379,10 @@ bool SROA::performPromotion(Function &F) { Changed = true; } + // FIXME: Is there a better way to handle the lazy initialization of DIB + // so that there doesn't need to be an explicit delete? + delete DIB; + return Changed; } @@ -1831,9 +1842,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 // (Also works for arrays instead of structs) Value *Insert = UndefValue::get(LIType); + IRBuilder<> Builder(LI); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Load = new LoadInst(NewElts[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + Value *Load = Builder.CreateLoad(NewElts[i], "load"); + Insert = Builder.CreateInsertValue(Insert, Load, i, "insert"); } LI->replaceAllUsesWith(Insert); DeadInsts.push_back(LI); @@ -1858,9 +1870,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // %val.1 = extractvalue { i32, i32 } %val, 1 // store i32 %val.1, i32* %alloc.1 // (Also works for arrays instead of structs) + IRBuilder<> Builder(SI); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, NewElts[i], SI); + Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName()); + Builder.CreateStore(Extract, NewElts[i]); } DeadInsts.push_back(SI); } else if (SIType->isIntegerTy() && @@ -2481,19 +2494,22 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, } if (CallSite CS = U) { - // If this is a readonly/readnone call site, then we know it is just a - // load and we can ignore it. - if (CS.onlyReadsMemory()) - continue; - // If this is the function being called then we treat it like a load and // ignore it. if (CS.isCallee(UI)) continue; + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || + CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))) + continue; + // If this is being passed as a byval argument, the caller is making a // copy, so it is only a read of the alloca. - unsigned ArgNo = CS.getArgumentNo(UI); if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) continue; } diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 1137c2b..7e9cc80 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -96,6 +96,7 @@ static void ChangeToCall(InvokeInst *II) { NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); II->replaceAllUsesWith(NewCall); // Follow the call by a branch to the normal destination. @@ -163,7 +164,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, Changed = true; } - Changed |= ConstantFoldTerminator(BB); + Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) Worklist.push_back(*SI); } while (!Worklist.empty()); diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 539cc6f..e21eb9d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -59,6 +59,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" @@ -209,10 +210,10 @@ bool TailCallElim::runOnFunction(Function &F) { } } - // Finally, if this function contains no non-escaping allocas, mark all calls - // in the function as eligible for tail calls (there is no stack memory for - // them to access). - if (!FunctionContainsEscapingAllocas) + // Finally, if this function contains no non-escaping allocas, or calls + // setjmp, mark all calls in the function as eligible for tail calls + //(there is no stack memory for them to access). + if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice()) for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index c705cc5..92464e8 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -542,11 +542,9 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, /// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a /// given basic block. DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) { - for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { - DebugLoc DL = BI->getDebugLoc(); - if (!DL.isUnknown()) - return DL; - } + if (const Instruction *I = BB->getFirstNonPHI()) + return I->getDebugLoc(); + // Scanning entire block may be too expensive, if the first instruction + // does not have valid location info. return DebugLoc(); } diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index caf2aeb..d6206a3 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -180,7 +180,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. - BranchInst::Create(DestBB, NewBB); + BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); + NewBI->setDebugLoc(TI->getDebugLoc()); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 4a90751..14bb17f 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -362,12 +362,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); const FunctionType *FT = Callee->getFunctionType(); - BasicBlock *BB = CI->getParent(); LLVMContext &Context = CI->getParent()->getContext(); - IRBuilder<> B(Context); - - // Set the builder to the instruction after the call. - B.SetInsertPoint(BB, CI); + IRBuilder<> B(CI); if (Name == "__memcpy_chk") { // Check if this has the right signature. diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 7d17909..8416170 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -10,6 +10,13 @@ // This file implements inlining of a function into a call site, resolving // parameters and the return value as appropriate. // +// The code in this file for handling inlines through invoke +// instructions preserves semantics only under some assumptions about +// the behavior of unwinders which correspond to gcc-style libUnwind +// exception personality functions. Eventually the IR will be +// improved to make this unnecessary, but until then, this code is +// marked [LIBUNWIND]. +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" @@ -28,6 +35,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/IRBuilder.h" using namespace llvm; bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) { @@ -37,6 +45,372 @@ bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) { return InlineFunction(CallSite(II), IFI); } +/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block. +static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) { + for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) { + EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i); + if (exn) return exn; + } + + return 0; +} + +/// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for +/// the given llvm.eh.exception call. +static EHSelectorInst *findSelectorForException(EHExceptionInst *exn) { + BasicBlock *exnBlock = exn->getParent(); + + EHSelectorInst *outOfBlockSelector = 0; + for (Instruction::use_iterator + ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) { + EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui); + if (!sel) continue; + + // Immediately accept an eh.selector in the same block as the + // excepton call. + if (sel->getParent() == exnBlock) return sel; + + // Otherwise, use the first selector we see. + if (!outOfBlockSelector) outOfBlockSelector = sel; + } + + return outOfBlockSelector; +} + +/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector +/// in the given landing pad. In principle, llvm.eh.exception is +/// required to be in the landing pad; in practice, SplitCriticalEdge +/// can break that invariant, and then inlining can break it further. +/// There's a real need for a reliable solution here, but until that +/// happens, we have some fragile workarounds here. +static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) { + // Look for an exception call in the actual landing pad. + EHExceptionInst *exn = findExceptionInBlock(lpad); + if (exn) return findSelectorForException(exn); + + // Okay, if that failed, look for one in an obvious successor. If + // we find one, we'll fix the IR by moving things back to the + // landing pad. + + bool dominates = true; // does the lpad dominate the exn call + BasicBlock *nonDominated = 0; // if not, the first non-dominated block + BasicBlock *lastDominated = 0; // and the block which branched to it + + BasicBlock *exnBlock = lpad; + + // We need to protect against lpads that lead into infinite loops. + SmallPtrSet<BasicBlock*,4> visited; + visited.insert(exnBlock); + + do { + // We're not going to apply this hack to anything more complicated + // than a series of unconditional branches, so if the block + // doesn't terminate in an unconditional branch, just fail. More + // complicated cases can arise when, say, sinking a call into a + // split unwind edge and then inlining it; but that can do almost + // *anything* to the CFG, including leaving the selector + // completely unreachable. The only way to fix that properly is + // to (1) prohibit transforms which move the exception or selector + // values away from the landing pad, e.g. by producing them with + // instructions that are pinned to an edge like a phi, or + // producing them with not-really-instructions, and (2) making + // transforms which split edges deal with that. + BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back()); + if (!branch || branch->isConditional()) return 0; + + BasicBlock *successor = branch->getSuccessor(0); + + // Fail if we found an infinite loop. + if (!visited.insert(successor)) return 0; + + // If the successor isn't dominated by exnBlock: + if (!successor->getSinglePredecessor()) { + // We don't want to have to deal with threading the exception + // through multiple levels of phi, so give up if we've already + // followed a non-dominating edge. + if (!dominates) return 0; + + // Otherwise, remember this as a non-dominating edge. + dominates = false; + nonDominated = successor; + lastDominated = exnBlock; + } + + exnBlock = successor; + + // Can we stop here? + exn = findExceptionInBlock(exnBlock); + } while (!exn); + + // Look for a selector call for the exception we found. + EHSelectorInst *selector = findSelectorForException(exn); + if (!selector) return 0; + + // The easy case is when the landing pad still dominates the + // exception call, in which case we can just move both calls back to + // the landing pad. + if (dominates) { + selector->moveBefore(lpad->getFirstNonPHI()); + exn->moveBefore(selector); + return selector; + } + + // Otherwise, we have to split at the first non-dominating block. + // The CFG looks basically like this: + // lpad: + // phis_0 + // insnsAndBranches_1 + // br label %nonDominated + // nonDominated: + // phis_2 + // insns_3 + // %exn = call i8* @llvm.eh.exception() + // insnsAndBranches_4 + // %selector = call @llvm.eh.selector(i8* %exn, ... + // We need to turn this into: + // lpad: + // phis_0 + // %exn0 = call i8* @llvm.eh.exception() + // %selector0 = call @llvm.eh.selector(i8* %exn0, ... + // insnsAndBranches_1 + // br label %split // from lastDominated + // nonDominated: + // phis_2 (without edge from lastDominated) + // %exn1 = call i8* @llvm.eh.exception() + // %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ... + // br label %split + // split: + // phis_2 (edge from lastDominated, edge from split) + // %exn = phi ... + // %selector = phi ... + // insns_3 + // insnsAndBranches_4 + + assert(nonDominated); + assert(lastDominated); + + // First, make clones of the intrinsics to go in lpad. + EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone()); + EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone()); + lpadSelector->setArgOperand(0, lpadExn); + lpadSelector->insertBefore(lpad->getFirstNonPHI()); + lpadExn->insertBefore(lpadSelector); + + // Split the non-dominated block. + BasicBlock *split = + nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(), + nonDominated->getName() + ".lpad-fix"); + + // Redirect the last dominated branch there. + cast<BranchInst>(lastDominated->back()).setSuccessor(0, split); + + // Move the existing intrinsics to the end of the old block. + selector->moveBefore(&nonDominated->back()); + exn->moveBefore(selector); + + Instruction *splitIP = &split->front(); + + // For all the phis in nonDominated, make a new phi in split to join + // that phi with the edge from lastDominated. + for (BasicBlock::iterator + i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) { + PHINode *phi = dyn_cast<PHINode>(i); + if (!phi) break; + + PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(), + splitIP); + phi->replaceAllUsesWith(splitPhi); + splitPhi->addIncoming(phi, nonDominated); + splitPhi->addIncoming(phi->removeIncomingValue(lastDominated), + lastDominated); + } + + // Make new phis for the exception and selector. + PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP); + exn->replaceAllUsesWith(exnPhi); + selector->setArgOperand(0, exn); // except for this use + exnPhi->addIncoming(exn, nonDominated); + exnPhi->addIncoming(lpadExn, lastDominated); + + PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP); + selector->replaceAllUsesWith(selectorPhi); + selectorPhi->addIncoming(selector, nonDominated); + selectorPhi->addIncoming(lpadSelector, lastDominated); + + return lpadSelector; +} + +namespace { + /// A class for recording information about inlining through an invoke. + class InvokeInliningInfo { + BasicBlock *OuterUnwindDest; + EHSelectorInst *OuterSelector; + BasicBlock *InnerUnwindDest; + PHINode *InnerExceptionPHI; + PHINode *InnerSelectorPHI; + SmallVector<Value*, 8> UnwindDestPHIValues; + + public: + InvokeInliningInfo(InvokeInst *II) : + OuterUnwindDest(II->getUnwindDest()), OuterSelector(0), + InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) { + + // If there are PHI nodes in the unwind destination block, we + // need to keep track of which values came into them from the + // invoke before removing the edge from this block. + llvm::BasicBlock *invokeBB = II->getParent(); + for (BasicBlock::iterator I = OuterUnwindDest->begin(); + isa<PHINode>(I); ++I) { + // Save the value to use for this edge. + PHINode *phi = cast<PHINode>(I); + UnwindDestPHIValues.push_back(phi->getIncomingValueForBlock(invokeBB)); + } + } + + /// The outer unwind destination is the target of unwind edges + /// introduced for calls within the inlined function. + BasicBlock *getOuterUnwindDest() const { + return OuterUnwindDest; + } + + EHSelectorInst *getOuterSelector() { + if (!OuterSelector) + OuterSelector = findSelectorForLandingPad(OuterUnwindDest); + return OuterSelector; + } + + BasicBlock *getInnerUnwindDest(); + + bool forwardEHResume(CallInst *call, BasicBlock *src); + + /// Add incoming-PHI values to the unwind destination block for + /// the given basic block, using the values for the original + /// invoke's source block. + void addIncomingPHIValuesFor(BasicBlock *BB) const { + addIncomingPHIValuesForInto(BB, OuterUnwindDest); + } + + void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { + BasicBlock::iterator I = dest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *phi = cast<PHINode>(I); + phi->addIncoming(UnwindDestPHIValues[i], src); + } + } + }; +} + +/// Get or create a target for the branch out of rewritten calls to +/// llvm.eh.resume. +BasicBlock *InvokeInliningInfo::getInnerUnwindDest() { + if (InnerUnwindDest) return InnerUnwindDest; + + // Find and hoist the llvm.eh.exception and llvm.eh.selector calls + // in the outer landing pad to immediately following the phis. + EHSelectorInst *selector = getOuterSelector(); + if (!selector) return 0; + + // The call to llvm.eh.exception *must* be in the landing pad. + Instruction *exn = cast<Instruction>(selector->getArgOperand(0)); + assert(exn->getParent() == OuterUnwindDest); + + // TODO: recognize when we've already done this, so that we don't + // get a linear number of these when inlining calls into lots of + // invokes with the same landing pad. + + // Do the hoisting. + Instruction *splitPoint = exn->getParent()->getFirstNonPHI(); + assert(splitPoint != selector && "selector-on-exception dominance broken!"); + if (splitPoint == exn) { + selector->removeFromParent(); + selector->insertAfter(exn); + splitPoint = selector->getNextNode(); + } else { + exn->moveBefore(splitPoint); + selector->moveBefore(splitPoint); + } + + // Split the landing pad. + InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint, + OuterUnwindDest->getName() + ".body"); + + // The number of incoming edges we expect to the inner landing pad. + const unsigned phiCapacity = 2; + + // Create corresponding new phis for all the phis in the outer landing pad. + BasicBlock::iterator insertPoint = InnerUnwindDest->begin(); + BasicBlock::iterator I = OuterUnwindDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *outerPhi = cast<PHINode>(I); + PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity, + outerPhi->getName() + ".lpad-body", + insertPoint); + outerPhi->replaceAllUsesWith(innerPhi); + innerPhi->addIncoming(outerPhi, OuterUnwindDest); + } + + // Create a phi for the exception value... + InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity, + "exn.lpad-body", insertPoint); + exn->replaceAllUsesWith(InnerExceptionPHI); + selector->setArgOperand(0, exn); // restore this use + InnerExceptionPHI->addIncoming(exn, OuterUnwindDest); + + // ...and the selector. + InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity, + "selector.lpad-body", insertPoint); + selector->replaceAllUsesWith(InnerSelectorPHI); + InnerSelectorPHI->addIncoming(selector, OuterUnwindDest); + + // All done. + return InnerUnwindDest; +} + +/// [LIBUNWIND] Try to forward the given call, which logically occurs +/// at the end of the given block, as a branch to the inner unwind +/// block. Returns true if the call was forwarded. +bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) { + // First, check whether this is a call to the intrinsic. + Function *fn = dyn_cast<Function>(call->getCalledValue()); + if (!fn || fn->getName() != "llvm.eh.resume") + return false; + + // At this point, we need to return true on all paths, because + // otherwise we'll construct an invoke of the intrinsic, which is + // not well-formed. + + // Try to find or make an inner unwind dest, which will fail if we + // can't find a selector call for the outer unwind dest. + BasicBlock *dest = getInnerUnwindDest(); + bool hasSelector = (dest != 0); + + // If we failed, just use the outer unwind dest, dropping the + // exception and selector on the floor. + if (!hasSelector) + dest = OuterUnwindDest; + + // Make a branch. + BranchInst::Create(dest, src); + + // Update the phis in the destination. They were inserted in an + // order which makes this work. + addIncomingPHIValuesForInto(src, dest); + + if (hasSelector) { + InnerExceptionPHI->addIncoming(call->getArgOperand(0), src); + InnerSelectorPHI->addIncoming(call->getArgOperand(1), src); + } + + return true; +} + +/// [LIBUNWIND] Check whether this selector is "only cleanups": +/// call i32 @llvm.eh.selector(blah, blah, i32 0) +static bool isCleanupOnlySelector(EHSelectorInst *selector) { + if (selector->getNumArgOperands() != 3) return false; + ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2)); + return (val && val->isZero()); +} /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into /// an invoke, we have to turn all of the calls that can throw into @@ -44,9 +418,9 @@ bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) { /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. /// -static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, - BasicBlock *InvokeDest, - const SmallVectorImpl<Value*> &InvokeDestPHIValues) { +/// Returns true to indicate that the next block should be skipped. +static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, + InvokeInliningInfo &Invoke) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = BBI++; @@ -54,6 +428,38 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // instructions require no special handling. CallInst *CI = dyn_cast<CallInst>(I); if (CI == 0) continue; + + // LIBUNWIND: merge selector instructions. + if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) { + EHSelectorInst *Outer = Invoke.getOuterSelector(); + if (!Outer) continue; + + bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner); + bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer); + + // If both selectors contain only cleanups, we don't need to do + // anything. TODO: this is really just a very specific instance + // of a much more general optimization. + if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue; + + // Otherwise, we just append the outer selector to the inner selector. + SmallVector<Value*, 16> NewSelector; + for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i) + NewSelector.push_back(Inner->getArgOperand(i)); + for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i) + NewSelector.push_back(Outer->getArgOperand(i)); + + CallInst *NewInner = CallInst::Create(Inner->getCalledValue(), + NewSelector.begin(), + NewSelector.end(), + "", + Inner); + // No need to copy attributes, calling convention, etc. + NewInner->takeName(Inner); + Inner->replaceAllUsesWith(NewInner); + Inner->eraseFromParent(); + continue; + } // If this call cannot unwind, don't convert it to an invoke. if (CI->doesNotThrow()) @@ -62,37 +468,45 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Convert this function call into an invoke instruction. // First, split the basic block. BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); - - // Next, create the new invoke instruction, inserting it at the end - // of the old basic block. + + // Delete the unconditional branch inserted by splitBasicBlock + BB->getInstList().pop_back(); + + // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch + // directly to the new landing pad. + if (Invoke.forwardEHResume(CI, BB)) { + // TODO: 'Split' is now unreachable; clean it up. + + // We want to leave the original call intact so that the call + // graph and other structures won't get misled. We also have to + // avoid processing the next block, or we'll iterate here forever. + return true; + } + + // Otherwise, create the new invoke instruction. ImmutableCallSite CS(CI); SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end()); InvokeInst *II = - InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, + InvokeInst::Create(CI->getCalledValue(), Split, + Invoke.getOuterUnwindDest(), InvokeArgs.begin(), InvokeArgs.end(), - CI->getName(), BB->getTerminator()); + CI->getName(), BB); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); // Make sure that anything using the call now uses the invoke! This also // updates the CallGraph if present, because it uses a WeakVH. CI->replaceAllUsesWith(II); - - // Delete the unconditional branch inserted by splitBasicBlock - BB->getInstList().pop_back(); + Split->getInstList().pop_front(); // Delete the original call - + // Update any PHI nodes in the exceptional block to indicate that // there is now a new entry in them. - unsigned i = 0; - for (BasicBlock::iterator I = InvokeDest->begin(); - isa<PHINode>(I); ++I, ++i) - cast<PHINode>(I)->addIncoming(InvokeDestPHIValues[i], BB); - - // This basic block is now complete, the caller will continue scanning the - // next one. - return; + Invoke.addIncomingPHIValuesFor(BB); + return false; } + + return false; } @@ -106,17 +520,6 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); - SmallVector<Value*, 8> InvokeDestPHIValues; - - // If there are PHI nodes in the unwind destination block, we need to - // keep track of which values came into them from this invoke, then remove - // the entry for this block. - BasicBlock *InvokeBlock = II->getParent(); - for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - // Save the value to use for this edge. - InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(InvokeBlock)); - } Function *Caller = FirstNewBlock->getParent(); @@ -132,11 +535,17 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InvokeDest->removePredecessor(II->getParent()); return; } + + InvokeInliningInfo Invoke(II); for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) - HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest, - InvokeDestPHIValues); + if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { + // Honor a request to skip the next block. We don't need to + // consider UnwindInsts in this case either. + ++BB; + continue; + } if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { // An UnwindInst requires special handling when it gets inlined into an @@ -150,12 +559,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // Update any PHI nodes in the exceptional block to indicate that // there is now a new entry in them. - unsigned i = 0; - for (BasicBlock::iterator I = InvokeDest->begin(); - isa<PHINode>(I); ++I, ++i) { - PHINode *PN = cast<PHINode>(I); - PN->addIncoming(InvokeDestPHIValues[i], BB); - } + Invoke.addIncomingPHIValuesFor(BB); } } @@ -299,21 +703,48 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, ConstantInt::get(Type::getInt32Ty(Context), 1), ConstantInt::getFalse(Context) // isVolatile }; - CallInst *TheMemCpy = - CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); - - // If we have a call graph, update it. - if (CallGraph *CG = IFI.CG) { - CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); - CallGraphNode *CallerNode = (*CG)[Caller]; - CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); - } + CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); // Uses of the argument in the function should use our new alloca // instead. return NewAlloca; } +// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime +// intrinsic. +static bool isUsedByLifetimeMarker(Value *V) { + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; + ++UI) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + } + return false; +} + +// hasLifetimeMarkers - Check whether the given alloca already has +// lifetime.start or lifetime.end intrinsics. +static bool hasLifetimeMarkers(AllocaInst *AI) { + const Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); + if (AI->getType() == Int8PtrTy) + return isUsedByLifetimeMarker(AI); + + // Do a scan to find all the bitcasts to i8*. + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E; + ++I) { + if (I->getType() != Int8PtrTy) continue; + if (!isa<BitCastInst>(*I)) continue; + if (isUsedByLifetimeMarker(*I)) + return true; + } + return false; +} + // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -460,6 +891,26 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { } } + // Leave lifetime markers for the static alloca's, scoping them to the + // function we just inlined. + if (!IFI.StaticAllocas.empty()) { + IRBuilder<> builder(FirstNewBlock->begin()); + for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { + AllocaInst *AI = IFI.StaticAllocas[ai]; + + // If the alloca is already scoped to something smaller than the whole + // function then there's no need to add redundant, less accurate markers. + if (hasLifetimeMarkers(AI)) + continue; + + builder.CreateLifetimeStart(AI); + for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { + IRBuilder<> builder(Returns[ri]); + builder.CreateLifetimeEnd(AI); + } + } + } + // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { @@ -468,25 +919,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); - // If we are preserving the callgraph, add edges to the stacksave/restore - // functions for the calls we insert. - CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; - if (CallGraph *CG = IFI.CG) { - StackSaveCGN = CG->getOrInsertFunction(StackSave); - StackRestoreCGN = CG->getOrInsertFunction(StackRestore); - CallerNode = (*CG)[Caller]; - } - // Insert the llvm.stacksave. CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", FirstNewBlock->begin()); - if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); - if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); + CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); } // Count the number of StackRestore calls we insert. @@ -498,8 +938,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI); - if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); + CallInst::Create(StackRestore, SavedPtr, "", UI); ++NumStackRestores; } } diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 4bca2fc..3bdbaa5 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -20,6 +20,7 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -34,6 +35,7 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" @@ -43,12 +45,16 @@ using namespace llvm; // Local constant propagation. // -// ConstantFoldTerminator - If a terminator instruction is predicated on a -// constant value, convert it into an unconditional branch to the constant -// destination. -// -bool llvm::ConstantFoldTerminator(BasicBlock *BB) { +/// ConstantFoldTerminator - If a terminator instruction is predicated on a +/// constant value, convert it into an unconditional branch to the constant +/// destination. This is a nontrivial operation because the successors of this +/// basic block must have their PHI nodes updated. +/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch +/// conditions and indirectbr addresses this might make dead if +/// DeleteDeadConditions is true. +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { TerminatorInst *T = BB->getTerminator(); + IRBuilder<> Builder(T); // Branch - See if we are conditional jumping on constant if (BranchInst *BI = dyn_cast<BranchInst>(T)) { @@ -71,7 +77,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { OldDest->removePredecessor(BB); // Replace the conditional branch with an unconditional one. - BranchInst::Create(Destination, BI); + Builder.CreateBr(Destination); BI->eraseFromParent(); return true; } @@ -86,8 +92,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { Dest1->removePredecessor(BI->getParent()); // Replace the conditional branch with an unconditional one. - BranchInst::Create(Dest1, BI); + Builder.CreateBr(Dest1); + Value *Cond = BI->getCondition(); BI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond); return true; } return false; @@ -136,7 +145,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { // now. if (TheOnlyDest) { // Insert the new branch. - BranchInst::Create(TheOnlyDest, SI); + Builder.CreateBr(TheOnlyDest); BasicBlock *BB = SI->getParent(); // Remove entries from PHI nodes which we no longer branch to... @@ -150,17 +159,21 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { } // Delete the old switch. - BB->getInstList().erase(SI); + Value *Cond = SI->getCondition(); + SI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond); return true; } if (SI->getNumSuccessors() == 2) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. - Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(), - SI->getSuccessorValue(1), "cond"); + Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), + SI->getSuccessorValue(1), "cond"); + // Insert the new branch. - BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI); + Builder.CreateCondBr(Cond, SI->getSuccessor(1), SI->getSuccessor(0)); // Delete the old switch. SI->eraseFromParent(); @@ -175,7 +188,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { BasicBlock *TheOnlyDest = BA->getBasicBlock(); // Insert the new branch. - BranchInst::Create(TheOnlyDest, IBI); + Builder.CreateBr(TheOnlyDest); for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { if (IBI->getDestination(i) == TheOnlyDest) @@ -183,7 +196,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { else IBI->getDestination(i)->removePredecessor(IBI->getParent()); } + Value *Address = IBI->getAddress(); IBI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Address); // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an @@ -785,10 +801,19 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (!DIVar.Verify()) return false; - Instruction *DbgVal = - Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, - DIVar, SI); - + Instruction *DbgVal = NULL; + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + Argument *ExtendedArg = NULL; + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); + if (ExtendedArg) + DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI); + else + DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI); + // Propagate any debug metadata from the store onto the dbg.value. DebugLoc SIDL = SI->getDebugLoc(); if (!SIDL.isUnknown()) @@ -853,3 +878,15 @@ bool llvm::LowerDbgDeclare(Function &F) { } return true; } + +/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the +/// alloca 'V', if any. +DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { + if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) + for (Value::use_iterator UI = DebugNode->use_begin(), + E = DebugNode->use_end(); UI != E; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + return DDI; + + return 0; +} diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 50c9ae2..a1736b9 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -100,18 +100,6 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { return true; } -/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the -/// alloca 'V', if any. -static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { - if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) - for (Value::use_iterator UI = DebugNode->use_begin(), - E = DebugNode->use_end(); UI != E; ++UI) - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) - return DDI; - - return 0; -} - namespace { struct AllocaInfo; diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 2860c3e..b336194 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -14,7 +14,9 @@ #define DEBUG_TYPE "ssaupdater" #include "llvm/Constants.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" @@ -22,6 +24,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" @@ -355,7 +358,8 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { LoadAndStorePromoter:: LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts, - SSAUpdater &S, StringRef BaseName) : SSA(S) { + SSAUpdater &S, DbgDeclareInst *DD, DIBuilder *DB, + StringRef BaseName) : SSA(S), DDI(DD), DIB(DB) { if (Insts.empty()) return; Value *SomeVal; @@ -402,9 +406,11 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // single user in it, we can rewrite it trivially. if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. - if (StoreInst *SI = dyn_cast<StoreInst>(User)) + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + if (DDI) + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); SSA.AddAvailableValue(BB, SI->getOperand(0)); - else + } else // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast<LoadInst>(User)); BlockUses.clear(); @@ -453,12 +459,15 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { continue; } - if (StoreInst *S = dyn_cast<StoreInst>(II)) { + if (StoreInst *SI = dyn_cast<StoreInst>(II)) { // If this is a store to an unrelated pointer, ignore it. - if (!isInstInList(S, Insts)) continue; - + if (!isInstInList(SI, Insts)) continue; + + if (DDI) + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + // Remember that this is the active value in the block. - StoredValue = S->getOperand(0); + StoredValue = SI->getOperand(0); } } @@ -513,4 +522,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { instructionDeleted(User); User->eraseFromParent(); } + + if (DDI) + DDI->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 18b8573..6df846c 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -20,6 +20,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" @@ -31,6 +32,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <set> @@ -55,16 +58,18 @@ class SimplifyCFGOpt { BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases); bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, - BasicBlock *Pred); - bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI); + BasicBlock *Pred, + IRBuilder<> &Builder); + bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI, + IRBuilder<> &Builder); - bool SimplifyReturn(ReturnInst *RI); - bool SimplifyUnwind(UnwindInst *UI); + bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); + bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder); bool SimplifyUnreachable(UnreachableInst *UI); - bool SimplifySwitch(SwitchInst *SI); + bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); bool SimplifyIndirectBr(IndirectBrInst *IBI); - bool SimplifyUncondBranch(BranchInst *BI); - bool SimplifyCondBranch(BranchInst *BI); + bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder); + bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {} @@ -541,7 +546,8 @@ ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1, /// form of jump threading. bool SimplifyCFGOpt:: SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, - BasicBlock *Pred) { + BasicBlock *Pred, + IRBuilder<> &Builder) { Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); if (!PredVal) return false; // Not a value comparison in predecessor. @@ -574,7 +580,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // uncond br. assert(ThisCases.size() == 1 && "Branch can only have one case!"); // Insert the new branch. - Instruction *NI = BranchInst::Create(ThisDef, TI); + Instruction *NI = Builder.CreateBr(ThisDef); (void) NI; // Remove PHI node entries for the dead edge. @@ -639,7 +645,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, CheckEdge = 0; // Insert the new branch. - Instruction *NI = BranchInst::Create(TheRealDest, TI); + Instruction *NI = Builder.CreateBr(TheRealDest); (void) NI; DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() @@ -674,7 +680,8 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) { /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons /// on the same value. If so, and if safe to do so, fold them together. -bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, + IRBuilder<> &Builder) { BasicBlock *BB = TI->getParent(); Value *CV = isValueEqualityComparison(TI); // CondVal assert(CV && "Not a comparison?"); @@ -767,16 +774,18 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i) AddPredecessorToBlock(NewSuccessors[i], Pred, BB); + Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without TargetData"); - CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()), - "magicptr", PTI); + CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), + "magicptr"); } // Now that the successors are updated, create the new Switch instruction. - SwitchInst *NewSI = SwitchInst::Create(CV, PredDefault, - PredCases.size(), PTI); + SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, + PredCases.size()); + NewSI->setDebugLoc(PTI->getDebugLoc()); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) NewSI->addCase(PredCases[i].first, PredCases[i].second); @@ -900,6 +909,7 @@ HoistTerminator: NT->takeName(I1); } + IRBuilder<true, NoFolder> Builder(NT); // Hoisting one of the terminators from our successor is a great thing. // Unfortunately, the successors of the if/else blocks may have PHI nodes in // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI @@ -916,9 +926,11 @@ HoistTerminator: // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) - SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V, - BB1V->getName()+"."+BB2V->getName(), NT); + if (SI == 0) + SI = cast<SelectInst> + (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, + BB1V->getName()+"."+BB2V->getName())); + // Make the PHI node use the select for all incoming values for BB1/BB2 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) @@ -1076,13 +1088,16 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Create a select whose true value is the speculatively executed value and // false value is the previously determined FalseV. + IRBuilder<true, NoFolder> Builder(BI); SelectInst *SI; if (Invert) - SI = SelectInst::Create(BrCond, FalseV, HInst, - FalseV->getName() + "." + HInst->getName(), BI); + SI = cast<SelectInst> + (Builder.CreateSelect(BrCond, FalseV, HInst, + FalseV->getName() + "." + HInst->getName())); else - SI = SelectInst::Create(BrCond, HInst, FalseV, - HInst->getName() + "." + FalseV->getName(), BI); + SI = cast<SelectInst> + (Builder.CreateSelect(BrCond, HInst, FalseV, + HInst->getName() + "." + FalseV->getName())); // Make the PHI node use the select for all incoming values for "then" and // "if" blocks. @@ -1156,6 +1171,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); if (RealDest == BB) continue; // Skip self loops. + // Skip if the predecessor's terminator is an indirect branch. + if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new @@ -1211,7 +1228,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { BB->removePredecessor(PredBB); PredBBTI->setSuccessor(i, EdgeBB); } - + // Recurse, simplifying any other constants. return FoldCondBranchOnPHI(BI, TD) | true; } @@ -1320,6 +1337,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); + IRBuilder<true, NoFolder> Builder(InsertPt); // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. @@ -1337,7 +1355,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", InsertPt); + SelectInst *NV = + cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); PN->replaceAllUsesWith(NV); NV->takeName(PN); PN->eraseFromParent(); @@ -1347,7 +1366,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. TerminatorInst *OldTI = DomBlock->getTerminator(); - BranchInst::Create(BB, OldTI); + Builder.SetInsertPoint(OldTI); + Builder.CreateBr(BB); OldTI->eraseFromParent(); return true; } @@ -1355,7 +1375,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, + IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); @@ -1370,13 +1391,14 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator()) return false; + Builder.SetInsertPoint(BI); // Okay, we found a branch that is going to two return nodes. If // there is no return value for this function, just change the // branch into a return. if (FalseRet->getNumOperands() == 0) { TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - ReturnInst::Create(BI->getContext(), 0, BI); + Builder.CreateRetVoid(); EraseTerminatorInstAndDCECond(BI); return true; } @@ -1419,14 +1441,14 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { } else if (isa<UndefValue>(TrueValue)) { TrueValue = FalseValue; } else { - TrueValue = SelectInst::Create(BrCond, TrueValue, - FalseValue, "retval", BI); + TrueValue = Builder.CreateSelect(BrCond, TrueValue, + FalseValue, "retval"); } } - Value *RI = !TrueValue ? - ReturnInst::Create(BI->getContext(), BI) : - ReturnInst::Create(BI->getContext(), TrueValue, BI); + Value *RI = !TrueValue ? + Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); + (void) RI; DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" @@ -1443,6 +1465,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { /// the predecessor and use logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) @@ -1563,7 +1586,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - + IRBuilder<> Builder(PBI); + // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { Value *NewCond = PBI->getCondition(); @@ -1572,8 +1596,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { CmpInst *CI = cast<CmpInst>(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { - NewCond = BinaryOperator::CreateNot(NewCond, - PBI->getCondition()->getName()+".not", PBI); + NewCond = Builder.CreateNot(NewCond, + PBI->getCondition()->getName()+".not"); } PBI->setCondition(NewCond); @@ -1600,8 +1624,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { New->takeName(Cond); Cond->setName(New->getName()+".old"); - Value *NewCond = BinaryOperator::Create(Opc, PBI->getCondition(), - New, "or.cond", PBI); + Instruction *NewCond = + cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(), + New, "or.cond")); PBI->setCondition(NewCond); if (PBI->getSuccessor(0) == BB) { AddPredecessorToBlock(TrueDest, PredBlock, BB); @@ -1744,23 +1769,22 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } DEBUG(dbgs() << *PBI->getParent()->getParent()); - + // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); + IRBuilder<true, NoFolder> Builder(PBI); if (PBIOp) - PBICond = BinaryOperator::CreateNot(PBICond, - PBICond->getName()+".not", - PBI); + PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not"); + Value *BICond = BI->getCondition(); if (BIOp) - BICond = BinaryOperator::CreateNot(BICond, - BICond->getName()+".not", - PBI); + BICond = Builder.CreateNot(BICond, BICond->getName()+".not"); + // Merge the conditions. - Value *Cond = BinaryOperator::CreateOr(PBICond, BICond, "brmerge", PBI); + Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); @@ -1783,8 +1807,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { Value *PBIV = PN->getIncomingValue(PBBIdx); if (BIV != PBIV) { // Insert a select in PBI to pick the right value. - Value *NV = SelectInst::Create(PBICond, PBIV, BIV, - PBIV->getName()+".mux", PBI); + Value *NV = cast<SelectInst> + (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux")); PN->setIncomingValue(PBBIdx, NV); } } @@ -1823,16 +1847,19 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, Succ->removePredecessor(OldTerm->getParent()); } + IRBuilder<> Builder(OldTerm); + Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); + // Insert an appropriate new terminator. if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) { if (TrueBB == FalseBB) // We were only looking for one successor, and it was present. // Create an unconditional branch to it. - BranchInst::Create(TrueBB, OldTerm); + Builder.CreateBr(TrueBB); else // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. - BranchInst::Create(TrueBB, FalseBB, Cond, OldTerm); + Builder.CreateCondBr(Cond, TrueBB, FalseBB); } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this // terminator must be unreachable. @@ -1843,10 +1870,10 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // the edge to the one that wasn't must be unreachable. if (KeepEdge1 == 0) // Only TrueBB was found. - BranchInst::Create(TrueBB, OldTerm); + Builder.CreateBr(TrueBB); else // Only FalseBB was found. - BranchInst::Create(FalseBB, OldTerm); + Builder.CreateBr(FalseBB); } EraseTerminatorInstAndDCECond(OldTerm); @@ -1911,8 +1938,10 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, - const TargetData *TD) { + const TargetData *TD, + IRBuilder<> &Builder) { BasicBlock *BB = ICI->getParent(); + // If the block has any PHIs in it or the icmp has multiple uses, it is too // complex. if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false; @@ -1990,7 +2019,9 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, SI->addCase(Cst, NewBB); // NewBB branches to the phi block, add the uncond branch and the phi entry. - BranchInst::Create(SuccBlock, NewBB); + Builder.SetInsertPoint(NewBB); + Builder.SetCurrentDebugLocation(SI->getDebugLoc()); + Builder.CreateBr(SuccBlock); PHIUse->addIncoming(NewCst, NewBB); return true; } @@ -1998,7 +2029,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) { +static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, + IRBuilder<> &Builder) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0) return false; @@ -2054,11 +2086,12 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) { BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test"); // Remove the uncond branch added to the old block. TerminatorInst *OldTI = BB->getTerminator(); - + Builder.SetInsertPoint(OldTI); + if (TrueWhenEqual) - BranchInst::Create(EdgeBB, NewBB, ExtraCase, OldTI); + Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else - BranchInst::Create(NewBB, EdgeBB, ExtraCase, OldTI); + Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); OldTI->eraseFromParent(); @@ -2070,18 +2103,19 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) { << "\nEXTRABB = " << *BB); BB = NewBB; } - + + Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without TargetData"); - CompVal = new PtrToIntInst(CompVal, - TD->getIntPtrType(CompVal->getContext()), - "magicptr", BI); + CompVal = Builder.CreatePtrToInt(CompVal, + TD->getIntPtrType(CompVal->getContext()), + "magicptr"); } // Create the new switch instruction now. - SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, Values.size(), BI); - + SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); + // Add all of the 'cases' to the switch instruction. for (unsigned i = 0, e = Values.size(); i != e; ++i) New->addCase(Values[i], EdgeBB); @@ -2104,7 +2138,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) { return true; } -bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) { +bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; @@ -2148,13 +2182,13 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) { // Check to see if the non-BB successor is also a return block. if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && - SimplifyCondBranchToTwoReturns(BI)) + SimplifyCondBranchToTwoReturns(BI, Builder)) return true; } return false; } -bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI) { +bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) { // Check to see if the first instruction in this block is just an unwind. // If so, replace any invoke instructions which use this as an exception // destination with call instructions. @@ -2169,14 +2203,16 @@ bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI) { if (II && II->getUnwindDest() == BB) { // Insert a new branch instruction before the invoke, because this // is now a fall through. - BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); + Builder.SetInsertPoint(II); + BranchInst *BI = Builder.CreateBr(II->getNormalDest()); Pred->getInstList().remove(II); // Take out of symbol table // Insert the call now. SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); - CallInst *CI = CallInst::Create(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName(), BI); + Builder.SetInsertPoint(BI); + CallInst *CI = Builder.CreateCall(II->getCalledValue(), + Args.begin(), Args.end(), + II->getName()); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); // If the invoke produced a value, the Call now does instead. @@ -2235,7 +2271,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); - + IRBuilder<> Builder(TI); if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isUnconditional()) { if (BI->getSuccessor(0) == BB) { @@ -2245,10 +2281,10 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } else { if (BI->getSuccessor(0) == BB) { - BranchInst::Create(BI->getSuccessor(1), BI); + Builder.CreateBr(BI->getSuccessor(1)); EraseTerminatorInstAndDCECond(BI); } else if (BI->getSuccessor(1) == BB) { - BranchInst::Create(BI->getSuccessor(0), BI); + Builder.CreateBr(BI->getSuccessor(0)); EraseTerminatorInstAndDCECond(BI); Changed = true; } @@ -2312,14 +2348,15 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { if (II->getUnwindDest() == BB) { // Convert the invoke to a call instruction. This would be a good // place to note that the call does not throw though. - BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); + BranchInst *BI = Builder.CreateBr(II->getNormalDest()); II->removeFromParent(); // Take out of symbol table // Insert the call now... SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); - CallInst *CI = CallInst::Create(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName(), BI); + Builder.SetInsertPoint(BI); + CallInst *CI = Builder.CreateCall(II->getCalledValue(), + Args.begin(), Args.end(), + II->getName()); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); // If the invoke produced a value, the call does now instead. @@ -2343,7 +2380,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { /// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a /// integer range comparison into a sub, an icmp and a branch. -static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) { +static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { assert(SI->getNumCases() > 2 && "Degenerate switch?"); // Make sure all cases point to the same destination and gather the values. @@ -2368,9 +2405,9 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) { Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) - Sub = BinaryOperator::CreateAdd(Sub, Offset, Sub->getName()+".off", SI); - Value *Cmp = new ICmpInst(SI, ICmpInst::ICMP_ULT, Sub, NumCases, "switch"); - BranchInst::Create(SI->getSuccessor(1), SI->getDefaultDest(), Cmp, SI); + Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); + Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest()); // Prune obsolete incoming values off the successor's PHI nodes. for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin(); @@ -2383,7 +2420,37 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) { return true; } -bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { +/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch +/// and use it to remove dead cases. +static bool EliminateDeadSwitchCases(SwitchInst *SI) { + Value *Cond = SI->getCondition(); + unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth(); + APInt KnownZero(Bits, 0), KnownOne(Bits, 0); + ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne); + + // Gather dead cases. + SmallVector<ConstantInt*, 8> DeadCases; + for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) { + if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 || + (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) { + DeadCases.push_back(SI->getCaseValue(I)); + DEBUG(dbgs() << "SimplifyCFG: switch case '" + << SI->getCaseValue(I)->getValue() << "' is dead.\n"); + } + } + + // Remove dead cases from the switch. + for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { + unsigned Case = SI->findCaseValue(DeadCases[I]); + // Prune unused values from PHI nodes. + SI->getSuccessor(Case)->removePredecessor(SI->getParent()); + SI->removeCase(Case); + } + + return !DeadCases.empty(); +} + +bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If this switch is too complex to want to look at, ignore it. if (!isValueEqualityComparison(SI)) return false; @@ -2393,7 +2460,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { // If we only have one predecessor, and if it is a branch on this value, // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) return SimplifyCFG(BB) | true; Value *Cond = SI->getCondition(); @@ -2408,13 +2475,17 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { while (isa<DbgInfoIntrinsic>(BBI)) ++BBI; if (SI == &*BBI) - if (FoldValueComparisonIntoPredecessors(SI)) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) return SimplifyCFG(BB) | true; // Try to transform the switch into an icmp and a branch. - if (TurnSwitchRangeIntoICmp(SI)) + if (TurnSwitchRangeIntoICmp(SI, Builder)) return SimplifyCFG(BB) | true; - + + // Remove unreachable cases. + if (EliminateDeadSwitchCases(SI)) + return SimplifyCFG(BB) | true; + return false; } @@ -2455,7 +2526,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { return Changed; } -bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI) { +bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); // If the Terminator is the only non-phi instruction, simplify the block. @@ -2470,7 +2541,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI) { if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; - if (I->isTerminator() && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD)) + if (I->isTerminator() + && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) return true; } @@ -2478,7 +2550,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI) { } -bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI) { +bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); // Conditional branch @@ -2487,7 +2559,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI) { // see if that predecessor totally determines the outcome of this // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred)) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) return SimplifyCFG(BB) | true; // This block must be empty, except for the setcond inst, if it exists. @@ -2497,20 +2569,20 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI) { while (isa<DbgInfoIntrinsic>(I)) ++I; if (&*I == BI) { - if (FoldValueComparisonIntoPredecessors(BI)) + if (FoldValueComparisonIntoPredecessors(BI, Builder)) return SimplifyCFG(BB) | true; } else if (&*I == cast<Instruction>(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) ++I; - if (&*I == BI && FoldValueComparisonIntoPredecessors(BI)) + if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) return SimplifyCFG(BB) | true; } } // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. - if (SimplifyBranchOnICmpChain(BI, TD)) + if (SimplifyBranchOnICmpChain(BI, TD, Builder)) return true; // We have a conditional branch to two blocks that are only reachable @@ -2581,7 +2653,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // Check to see if we can constant propagate this terminator instruction // away... - Changed |= ConstantFoldTerminator(BB); + Changed |= ConstantFoldTerminator(BB, true); // Check for and eliminate duplicate PHI nodes in this block. Changed |= EliminateDuplicatePHINodes(BB); @@ -2593,27 +2665,30 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { if (MergeBlockIntoPredecessor(BB)) return true; + IRBuilder<> Builder(BB); + // If there is a trivial two-entry PHI node in this basic block, and we can // eliminate it, do so now. if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) Changed |= FoldTwoEntryPHINode(PN, TD); + Builder.SetInsertPoint(BB->getTerminator()); if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { if (BI->isUnconditional()) { - if (SimplifyUncondBranch(BI)) return true; + if (SimplifyUncondBranch(BI, Builder)) return true; } else { - if (SimplifyCondBranch(BI)) return true; + if (SimplifyCondBranch(BI, Builder)) return true; } } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { - if (SimplifyReturn(RI)) return true; + if (SimplifyReturn(RI, Builder)) return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (SimplifySwitch(SI)) return true; + if (SimplifySwitch(SI, Builder)) return true; } else if (UnreachableInst *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) { if (SimplifyUnreachable(UI)) return true; } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - if (SimplifyUnwind(UI)) return true; + if (SimplifyUnwind(UI, Builder)) return true; } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) { if (SimplifyIndirectBr(IBI)) return true; |