diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 32 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopUnswitch.cpp | 141 | ||||
-rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 55 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SCCP.cpp | 61 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 32 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyLibCalls.cpp | 29 | ||||
-rw-r--r-- | lib/Transforms/Utils/BasicBlockUtils.cpp | 22 | ||||
-rw-r--r-- | lib/Transforms/Utils/BuildLibCalls.cpp | 43 | ||||
-rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 11 |
9 files changed, 240 insertions, 186 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 76c815d..e025b05 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -136,8 +136,14 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { return 0; // If not 1/2/4/8 bytes, exit. // Use an integer load+store unless we can find something better. - Type *NewPtrTy = - PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3)); + unsigned SrcAddrSp = + cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace(); + unsigned DstAddrSp = + cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace(); + + const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); + Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); + Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); // Memcpy forces the use of i8* for the source and destination. That means // that if you're using memcpy to move one double around, you'll get a cast @@ -167,8 +173,10 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { break; } - if (SrcETy->isSingleValueType()) - NewPtrTy = PointerType::getUnqual(SrcETy); + if (SrcETy->isSingleValueType()) { + NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); + NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); + } } } @@ -178,11 +186,12 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); - Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); + Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy); + Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign); InsertNewInstBefore(L, *MI); - InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); + InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign), + *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); @@ -275,10 +284,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[1]; - Tys[0] = CI.getOperand(3)->getType(); + const Type *Tys[3] = { CI.getOperand(1)->getType(), + CI.getOperand(2)->getType(), + CI.getOperand(3)->getType() }; CI.setOperand(0, - Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); + Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); Changed = true; } } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 27fd2ef..3918738 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -231,8 +231,7 @@ bool LoopUnswitch::processCurrentLoop() { // block that is branching on a loop-invariant condition, we can unswitch this // loop. for (Loop::block_iterator I = currentLoop->block_begin(), - E = currentLoop->block_end(); - I != E; ++I) { + E = currentLoop->block_end(); I != E; ++I) { TerminatorInst *TI = (*I)->getTerminator(); if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { // If this isn't branching on an invariant condition, we can't unswitch @@ -474,7 +473,6 @@ static inline void RemapInstruction(Instruction *I, static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM, LoopInfo *LI, LPPassManager *LPM) { Loop *New = new Loop(); - LPM->insertLoop(New, PL); // Add all of the blocks in L to the new loop. @@ -565,8 +563,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, /// SplitExitEdges - Split all of the edges from inside the loop to their exit /// blocks. Update the appropriate Phi nodes as we do so. void LoopUnswitch::SplitExitEdges(Loop *L, - const SmallVector<BasicBlock *, 8> &ExitBlocks) -{ + const SmallVector<BasicBlock *, 8> &ExitBlocks){ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; @@ -619,15 +616,15 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, NewBlocks.reserve(LoopBlocks.size()); DenseMap<const Value*, Value*> ValueMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { - BasicBlock *New = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); - NewBlocks.push_back(New); - ValueMap[LoopBlocks[i]] = New; // Keep the BB mapping. - LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L); + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); + NewBlocks.push_back(NewBB); + ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } // Splice the newly inserted blocks into the function right before the // original preheader. - F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(), + F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. @@ -652,8 +649,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // If the successor of the exit block had PHI nodes, add an entry for // NewExit. PHINode *PN; - for (BasicBlock::iterator I = ExitSucc->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) { + for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { + PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V); if (It != ValueMap.end()) V = It->second; @@ -682,7 +679,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. - RewriteLoopBodyWithConditionConstant(L , LIC, Val, false); + RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); // It's possible that simplifying one loop could cause the other to be // deleted. If so, don't simplify it. @@ -884,65 +881,66 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); } - } else { - // Otherwise, we don't know the precise value of LIC, but we do know that it - // is certainly NOT "Val". As such, simplify any uses in the loop that we - // can. This case occurs when we unswitch switch statements. - for (unsigned i = 0, e = Users.size(); i != e; ++i) - if (Instruction *U = cast<Instruction>(Users[i])) { - if (!L->contains(U)) - continue; + SimplifyCode(Worklist, L); + return; + } + + // Otherwise, we don't know the precise value of LIC, but we do know that it + // is certainly NOT "Val". As such, simplify any uses in the loop that we + // can. This case occurs when we unswitch switch statements. + for (unsigned i = 0, e = Users.size(); i != e; ++i) { + Instruction *U = cast<Instruction>(Users[i]); + if (!L->contains(U)) + continue; - Worklist.push_back(U); + Worklist.push_back(U); - // If we know that LIC is not Val, use this info to simplify code. - if (SwitchInst *SI = dyn_cast<SwitchInst>(U)) { - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { - if (SI->getCaseValue(i) == Val) { - // Found a dead case value. Don't remove PHI nodes in the - // successor if they become single-entry, those PHI nodes may - // be in the Users list. - - // FIXME: This is a hack. We need to keep the successor around - // and hooked up so as to preserve the loop structure, because - // trying to update it is complicated. So instead we preserve the - // loop structure and put the block on a dead code path. - BasicBlock *Switch = SI->getParent(); - SplitEdge(Switch, SI->getSuccessor(i), this); - // Compute the successors instead of relying on the return value - // of SplitEdge, since it may have split the switch successor - // after PHI nodes. - BasicBlock *NewSISucc = SI->getSuccessor(i); - BasicBlock *OldSISucc = *succ_begin(NewSISucc); - // Create an "unreachable" destination. - BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", - Switch->getParent(), - OldSISucc); - new UnreachableInst(Context, Abort); - // Force the new case destination to branch to the "unreachable" - // block while maintaining a (dead) CFG edge to the old block. - NewSISucc->getTerminator()->eraseFromParent(); - BranchInst::Create(Abort, OldSISucc, - ConstantInt::getTrue(Context), NewSISucc); - // Release the PHI operands for this edge. - for (BasicBlock::iterator II = NewSISucc->begin(); - PHINode *PN = dyn_cast<PHINode>(II); ++II) - PN->setIncomingValue(PN->getBasicBlockIndex(Switch), - UndefValue::get(PN->getType())); - // Tell the domtree about the new block. We don't fully update the - // domtree here -- instead we force it to do a full recomputation - // after the pass is complete -- but we do need to inform it of - // new blocks. - if (DT) - DT->addNewBlock(Abort, NewSISucc); - break; - } - } - } + // TODO: We could do other simplifications, for example, turning + // 'icmp eq LIC, Val' -> false. + + // If we know that LIC is not Val, use this info to simplify code. + SwitchInst *SI = dyn_cast<SwitchInst>(U); + if (SI == 0 || !isa<ConstantInt>(Val)) continue; + + unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); + if (DeadCase == 0) continue; // Default case is live for multiple values. + + // Found a dead case value. Don't remove PHI nodes in the + // successor if they become single-entry, those PHI nodes may + // be in the Users list. - // TODO: We could do other simplifications, for example, turning - // LIC == Val -> false. - } + // FIXME: This is a hack. We need to keep the successor around + // and hooked up so as to preserve the loop structure, because + // trying to update it is complicated. So instead we preserve the + // loop structure and put the block on a dead code path. + BasicBlock *Switch = SI->getParent(); + SplitEdge(Switch, SI->getSuccessor(DeadCase), this); + // Compute the successors instead of relying on the return value + // of SplitEdge, since it may have split the switch successor + // after PHI nodes. + BasicBlock *NewSISucc = SI->getSuccessor(DeadCase); + BasicBlock *OldSISucc = *succ_begin(NewSISucc); + // Create an "unreachable" destination. + BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", + Switch->getParent(), + OldSISucc); + new UnreachableInst(Context, Abort); + // Force the new case destination to branch to the "unreachable" + // block while maintaining a (dead) CFG edge to the old block. + NewSISucc->getTerminator()->eraseFromParent(); + BranchInst::Create(Abort, OldSISucc, + ConstantInt::getTrue(Context), NewSISucc); + // Release the PHI operands for this edge. + for (BasicBlock::iterator II = NewSISucc->begin(); + PHINode *PN = dyn_cast<PHINode>(II); ++II) + PN->setIncomingValue(PN->getBasicBlockIndex(Switch), + UndefValue::get(PN->getType())); + // Tell the domtree about the new block. We don't fully update the + // domtree here -- instead we force it to do a full recomputation + // after the pass is complete -- but we do need to inform it of + // new blocks. + if (DT) + DT->addNewBlock(Abort, NewSISucc); } SimplifyCode(Worklist, L); @@ -1054,7 +1052,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { LPM->deleteSimpleAnalysisValue(Succ, L); Succ->eraseFromParent(); ++NumSimplify; - } else if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){ + break; + } + + if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){ // Conditional branch. Turn it into an unconditional branch, then // remove dead blocks. break; // FIXME: Enable. diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 62e2977..3b305ae 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -413,7 +413,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // interesting as a small compile-time optimization. Ranges.addStore(0, SI); - Function *MemSetF = 0; // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. @@ -433,29 +432,40 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // memset block. This ensure that the memset is dominated by any addressing // instruction needed by the start of the block. BasicBlock::iterator InsertPt = BI; - - if (MemSetF == 0) { - const Type *Ty = Type::getInt64Ty(Context); - MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1); - } - + // Get the starting pointer of the block. StartPtr = Range.StartPtr; - + + // Determine alignment + unsigned Alignment = Range.Alignment; + if (Alignment == 0) { + const Type *EltType = + cast<PointerType>(StartPtr->getType())->getElementType(); + Alignment = TD->getABITypeAlignment(EltType); + } + // Cast the start ptr to be i8* as memset requires. - const Type *i8Ptr = Type::getInt8PtrTy(Context); - if (StartPtr->getType() != i8Ptr) + const PointerType* StartPTy = cast<PointerType>(StartPtr->getType()); + const PointerType *i8Ptr = Type::getInt8PtrTy(Context, + StartPTy->getAddressSpace()); + if (StartPTy!= i8Ptr) StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), InsertPt); - + Value *Ops[] = { StartPtr, ByteVal, // Start, value // size ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), // align - ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment) + ConstantInt::get(Type::getInt32Ty(Context), Alignment), + // volatile + ConstantInt::get(Type::getInt1Ty(Context), 0), }; - Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt); + const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; + + Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); + + Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i]; @@ -680,16 +690,19 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { return false; // If all checks passed, then we can transform these memcpy's - const Type *Ty = M->getLength()->getType(); + const Type *ArgTys[3] = { M->getRawDest()->getType(), + MDep->getRawSource()->getType(), + M->getLength()->getType() }; Function *MemCpyFun = Intrinsic::getDeclaration( M->getParent()->getParent()->getParent(), - M->getIntrinsicID(), &Ty, 1); + M->getIntrinsicID(), ArgTys, 3); - Value *Args[4] = { - M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst() + Value *Args[5] = { + M->getRawDest(), MDep->getRawSource(), M->getLength(), + M->getAlignmentCst(), M->getVolatileCst() }; - CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M); + CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M); // If C and M don't interfere, then this is a valid transformation. If they @@ -728,8 +741,10 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); - const Type *Ty = M->getLength()->getType(); - M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1)); + const Type *ArgTys[3] = { M->getRawDest()->getType(), + M->getRawSource()->getType(), + M->getLength()->getType() }; + M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 546b7b6..4f09bee 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1521,45 +1521,48 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { } } + // Check to see if we have a branch or switch on an undefined value. If so + // we force the branch to go one way or the other to make the successor + // values live. It doesn't really matter which way we force it. TerminatorInst *TI = BB->getTerminator(); if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (!BI->isConditional()) continue; if (!getValueState(BI->getCondition()).isUndefined()) continue; - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + + // If the input to SCCP is actually branch on undef, fix the undef to + // false. + if (isa<UndefValue>(BI->getCondition())) { + BI->setCondition(ConstantInt::getFalse(BI->getContext())); + markEdgeExecutable(BB, TI->getSuccessor(1)); + return true; + } + + // Otherwise, it is a branch on a symbolic value which is currently + // considered to be undef. Handle this by forcing the input value to the + // branch to false. + markForcedConstant(BI->getCondition(), + ConstantInt::getFalse(TI->getContext())); + return true; + } + + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (SI->getNumSuccessors() < 2) // no cases continue; if (!getValueState(SI->getCondition()).isUndefined()) continue; - } else { - continue; - } - - // If the edge to the second successor isn't thought to be feasible yet, - // mark it so now. We pick the second one so that this goes to some - // enumerated value in a switch instead of going to the default destination. - if (KnownFeasibleEdges.count(Edge(BB, TI->getSuccessor(1)))) - continue; - - // Otherwise, it isn't already thought to be feasible. Mark it as such now - // and return. This will make other blocks reachable, which will allow new - // values to be discovered and existing ones to be moved in the lattice. - markEdgeExecutable(BB, TI->getSuccessor(1)); - - // This must be a conditional branch of switch on undef. At this point, - // force the old terminator to branch to the first successor. This is - // required because we are now influencing the dataflow of the function with - // the assumption that this edge is taken. If we leave the branch condition - // as undef, then further analysis could think the undef went another way - // leading to an inconsistent set of conclusions. - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - BI->setCondition(ConstantInt::getFalse(BI->getContext())); - } else { - SwitchInst *SI = cast<SwitchInst>(TI); - SI->setCondition(SI->getCaseValue(1)); + + // If the input to SCCP is actually switch on undef, fix the undef to + // the first constant. + if (isa<UndefValue>(SI->getCondition())) { + SI->setCondition(SI->getCaseValue(1)); + markEdgeExecutable(BB, TI->getSuccessor(1)); + return true; + } + + markForcedConstant(SI->getCondition(), SI->getCaseValue(1)); + return true; } - - return true; } return false; diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index bbe6270..6211beb 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -858,8 +858,17 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI); // Cast the other pointer (if we have one) to BytePtrTy. - if (OtherElt && OtherElt->getType() != BytePtrTy) - OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI); + if (OtherElt && OtherElt->getType() != BytePtrTy) { + // Preserve address space of OtherElt + const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType()); + const PointerType* PTy = cast<PointerType>(BytePtrTy); + if (OtherPTy->getElementType() != PTy->getElementType()) { + Type *NewOtherPTy = PointerType::get(PTy->getElementType(), + OtherPTy->getAddressSpace()); + OtherElt = new BitCastInst(OtherElt, NewOtherPTy, + OtherElt->getNameStr(), MI); + } + } unsigned EltSize = TD->getTypeAllocSize(EltTy); @@ -870,17 +879,28 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, SROADest ? OtherElt : EltPtr, // Src ptr ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size // Align - ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign) + ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), + MI->getVolatileCst() }; - CallInst::Create(TheFn, Ops, Ops + 4, "", MI); + // In case we fold the address space overloaded memcpy of A to B + // with memcpy of B to C, change the function to be a memcpy of A to C. + const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(), + Ops[2]->getType() }; + Module *M = MI->getParent()->getParent()->getParent(); + TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3); + CallInst::Create(TheFn, Ops, Ops + 5, "", MI); } else { assert(isa<MemSetInst>(MI)); Value *Ops[] = { EltPtr, MI->getOperand(2), // Dest, Value, ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size - Zero // Align + Zero, // Align + ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile }; - CallInst::Create(TheFn, Ops, Ops + 4, "", MI); + const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; + Module *M = MI->getParent()->getParent()->getParent(); + TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); + CallInst::Create(TheFn, Ops, Ops + 5, "", MI); } } DeadInsts.push_back(MI); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 5941ea6..b053cfc 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -142,7 +142,8 @@ struct StrCatOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD); + ConstantInt::get(TD->getIntPtrType(*Context), Len+1), + 1, false, B, TD); } }; @@ -383,7 +384,8 @@ struct StrCpyOpt : public LibCallOptimization { CI->getOperand(3), B, TD); else EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); + ConstantInt::get(TD->getIntPtrType(*Context), Len), + 1, false, B, TD); return Dst; } }; @@ -411,8 +413,8 @@ struct StrNCpyOpt : public LibCallOptimization { if (SrcLen == 0) { // strncpy(x, "", y) -> memset(x, '\0', y, 1) - EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, - B, TD); + EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), + LenOp, false, B, TD); return Dst; } @@ -432,7 +434,8 @@ struct StrNCpyOpt : public LibCallOptimization { // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); + ConstantInt::get(TD->getIntPtrType(*Context), Len), + 1, false, B, TD); return Dst; } @@ -593,7 +596,7 @@ struct MemCpyOpt : public LibCallOptimization { // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, B, TD); + CI->getOperand(3), 1, false, B, TD); return CI->getOperand(1); } }; @@ -615,7 +618,7 @@ struct MemMoveOpt : public LibCallOptimization { // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) EmitMemMove(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, B, TD); + CI->getOperand(3), 1, false, B, TD); return CI->getOperand(1); } }; @@ -637,8 +640,8 @@ struct MemSetOpt : public LibCallOptimization { // memset(p, v, n) -> llvm.memset(p, v, n, 1) Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), - false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD); + false); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); return CI->getOperand(1); } }; @@ -999,7 +1002,7 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()+1), 1, B, TD); + FormatStr.size()+1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1013,11 +1016,11 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; Value *V = B.CreateTrunc(CI->getOperand(3), - Type::getInt8Ty(*Context), "char"); + Type::getInt8Ty(*Context), "char"); Value *Ptr = CastToCStr(CI->getOperand(1), B); B.CreateStore(V, Ptr); Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), - "nul"); + "nul"); B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr); return ConstantInt::get(CI->getType(), 1); @@ -1034,7 +1037,7 @@ struct SPrintFOpt : public LibCallOptimization { Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD); + EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 1f62dab..2f1ae00 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -336,21 +336,19 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, LI->getBase()); - if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) - { - // Old dominates New. New node domiantes all other nodes dominated by Old. - DomTreeNode *OldNode = DT->getNode(Old); - std::vector<DomTreeNode *> Children; - for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); - I != E; ++I) - Children.push_back(*I); - - DomTreeNode *NewNode = DT->addNewBlock(New,Old); - + if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { + // Old dominates New. New node domiantes all other nodes dominated by Old. + DomTreeNode *OldNode = DT->getNode(Old); + std::vector<DomTreeNode *> Children; + for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); + I != E; ++I) + Children.push_back(*I); + + DomTreeNode *NewNode = DT->addNewBlock(New,Old); for (std::vector<DomTreeNode *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); - } + } if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) DF->splitBlock(Old); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 0afccf4..fff8179 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -109,15 +109,16 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always /// expects that Len has type 'intptr_t' and Dst/Src are pointers. -Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, + bool isVolatile, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - const Type *Ty = Len->getType(); - Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1); + const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); - return B.CreateCall4(MemCpy, Dst, Src, Len, - ConstantInt::get(B.getInt32Ty(), Align)); + return B.CreateCall5(MemCpy, Dst, Src, Len, + ConstantInt::get(B.getInt32Ty(), Align), + ConstantInt::get(B.getInt1Ty(), isVolatile)); } /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder. @@ -146,16 +147,18 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemMove - Emit a call to the memmove function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. -Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align, + bool isVolatile, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); LLVMContext &Context = B.GetInsertBlock()->getContext(); - const Type *Ty = TD->getIntPtrType(Context); - Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1); + const Type *ArgTys[3] = { Dst->getType(), Src->getType(), + TD->getIntPtrType(Context) }; + Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); Value *A = ConstantInt::get(B.getInt32Ty(), Align); - return B.CreateCall4(MemMove, Dst, Src, Len, A); + Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile); + return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol); } /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is @@ -206,15 +209,15 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, } /// EmitMemSet - Emit a call to the memset function -Value *llvm::EmitMemSet(Value *Dst, Value *Val, - Value *Len, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile, + IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); Intrinsic::ID IID = Intrinsic::memset; - const Type *Tys[1]; - Tys[0] = Len->getType(); - Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); + const Type *Tys[2] = { Dst->getType(), Len->getType() }; + Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2); Value *Align = ConstantInt::get(B.getInt32Ty(), 1); - return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); + Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile); + return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol); } /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. @@ -381,7 +384,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (Name == "__memcpy_chk") { if (isFoldable(4, 3, false)) { EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), - 1, B, TD); + 1, false, B, TD); replaceCall(CI->getOperand(1)); return true; } @@ -396,7 +399,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (Name == "__memmove_chk") { if (isFoldable(4, 3, false)) { EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), - 1, B, TD); + 1, false, B, TD); replaceCall(CI->getOperand(1)); return true; } @@ -407,7 +410,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (isFoldable(4, 3, false)) { Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); replaceCall(CI->getOperand(1)); return true; } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 17f8827..75c9ccd 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -297,10 +297,10 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, I->getName(), &*Caller->begin()->begin()); // Emit a memcpy. - const Type *Tys[] = { Type::getInt64Ty(Context) }; + const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, - Tys, 1); + Tys, 3); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); @@ -309,17 +309,18 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, Size = ConstantExpr::getSizeOf(AggTy); else Size = ConstantInt::get(Type::getInt64Ty(Context), - TD->getTypeStoreSize(AggTy)); + TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. Value *CallArgs[] = { DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1) + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::get(Type::getInt1Ty(Context), 0) }; CallInst *TheMemCpy = - CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); + CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); // If we have a call graph, update it. if (CG) { |