summaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp32
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp141
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp55
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp61
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp32
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp29
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp22
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp43
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp11
9 files changed, 240 insertions, 186 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 76c815d..e025b05 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -136,8 +136,14 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
return 0; // If not 1/2/4/8 bytes, exit.
// Use an integer load+store unless we can find something better.
- Type *NewPtrTy =
- PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3));
+ unsigned SrcAddrSp =
+ cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace();
+ unsigned DstAddrSp =
+ cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace();
+
+ const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+ Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
+ Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
// Memcpy forces the use of i8* for the source and destination. That means
// that if you're using memcpy to move one double around, you'll get a cast
@@ -167,8 +173,10 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
break;
}
- if (SrcETy->isSingleValueType())
- NewPtrTy = PointerType::getUnqual(SrcETy);
+ if (SrcETy->isSingleValueType()) {
+ NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
+ NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+ }
}
}
@@ -178,11 +186,12 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
SrcAlign = std::max(SrcAlign, CopyAlign);
DstAlign = std::max(DstAlign, CopyAlign);
- Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
- Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
- Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
+ Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy);
+ Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy);
+ Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign);
InsertNewInstBefore(L, *MI);
- InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
+ InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign),
+ *MI);
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
@@ -275,10 +284,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (GVSrc->isConstant()) {
Module *M = CI.getParent()->getParent()->getParent();
Intrinsic::ID MemCpyID = Intrinsic::memcpy;
- const Type *Tys[1];
- Tys[0] = CI.getOperand(3)->getType();
+ const Type *Tys[3] = { CI.getOperand(1)->getType(),
+ CI.getOperand(2)->getType(),
+ CI.getOperand(3)->getType() };
CI.setOperand(0,
- Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
+ Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
Changed = true;
}
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 27fd2ef..3918738 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -231,8 +231,7 @@ bool LoopUnswitch::processCurrentLoop() {
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
for (Loop::block_iterator I = currentLoop->block_begin(),
- E = currentLoop->block_end();
- I != E; ++I) {
+ E = currentLoop->block_end(); I != E; ++I) {
TerminatorInst *TI = (*I)->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
// If this isn't branching on an invariant condition, we can't unswitch
@@ -474,7 +473,6 @@ static inline void RemapInstruction(Instruction *I,
static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
LoopInfo *LI, LPPassManager *LPM) {
Loop *New = new Loop();
-
LPM->insertLoop(New, PL);
// Add all of the blocks in L to the new loop.
@@ -565,8 +563,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
/// SplitExitEdges - Split all of the edges from inside the loop to their exit
/// blocks. Update the appropriate Phi nodes as we do so.
void LoopUnswitch::SplitExitEdges(Loop *L,
- const SmallVector<BasicBlock *, 8> &ExitBlocks)
-{
+ const SmallVector<BasicBlock *, 8> &ExitBlocks){
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
@@ -619,15 +616,15 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
NewBlocks.reserve(LoopBlocks.size());
DenseMap<const Value*, Value*> ValueMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
- BasicBlock *New = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
- NewBlocks.push_back(New);
- ValueMap[LoopBlocks[i]] = New; // Keep the BB mapping.
- LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L);
+ BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
+ NewBlocks.push_back(NewBB);
+ ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
+ LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
}
// Splice the newly inserted blocks into the function right before the
// original preheader.
- F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(),
+ F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
NewBlocks[0], F->end());
// Now we create the new Loop object for the versioned loop.
@@ -652,8 +649,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// If the successor of the exit block had PHI nodes, add an entry for
// NewExit.
PHINode *PN;
- for (BasicBlock::iterator I = ExitSucc->begin();
- (PN = dyn_cast<PHINode>(I)); ++I) {
+ for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
+ PN = cast<PHINode>(I);
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V);
if (It != ValueMap.end()) V = It->second;
@@ -682,7 +679,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// Now we rewrite the original code to know that the condition is true and the
// new code to know that the condition is false.
- RewriteLoopBodyWithConditionConstant(L , LIC, Val, false);
+ RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
// It's possible that simplifying one loop could cause the other to be
// deleted. If so, don't simplify it.
@@ -884,65 +881,66 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
U->replaceUsesOfWith(LIC, Replacement);
Worklist.push_back(U);
}
- } else {
- // Otherwise, we don't know the precise value of LIC, but we do know that it
- // is certainly NOT "Val". As such, simplify any uses in the loop that we
- // can. This case occurs when we unswitch switch statements.
- for (unsigned i = 0, e = Users.size(); i != e; ++i)
- if (Instruction *U = cast<Instruction>(Users[i])) {
- if (!L->contains(U))
- continue;
+ SimplifyCode(Worklist, L);
+ return;
+ }
+
+ // Otherwise, we don't know the precise value of LIC, but we do know that it
+ // is certainly NOT "Val". As such, simplify any uses in the loop that we
+ // can. This case occurs when we unswitch switch statements.
+ for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+ Instruction *U = cast<Instruction>(Users[i]);
+ if (!L->contains(U))
+ continue;
- Worklist.push_back(U);
+ Worklist.push_back(U);
- // If we know that LIC is not Val, use this info to simplify code.
- if (SwitchInst *SI = dyn_cast<SwitchInst>(U)) {
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
- if (SI->getCaseValue(i) == Val) {
- // Found a dead case value. Don't remove PHI nodes in the
- // successor if they become single-entry, those PHI nodes may
- // be in the Users list.
-
- // FIXME: This is a hack. We need to keep the successor around
- // and hooked up so as to preserve the loop structure, because
- // trying to update it is complicated. So instead we preserve the
- // loop structure and put the block on a dead code path.
- BasicBlock *Switch = SI->getParent();
- SplitEdge(Switch, SI->getSuccessor(i), this);
- // Compute the successors instead of relying on the return value
- // of SplitEdge, since it may have split the switch successor
- // after PHI nodes.
- BasicBlock *NewSISucc = SI->getSuccessor(i);
- BasicBlock *OldSISucc = *succ_begin(NewSISucc);
- // Create an "unreachable" destination.
- BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
- Switch->getParent(),
- OldSISucc);
- new UnreachableInst(Context, Abort);
- // Force the new case destination to branch to the "unreachable"
- // block while maintaining a (dead) CFG edge to the old block.
- NewSISucc->getTerminator()->eraseFromParent();
- BranchInst::Create(Abort, OldSISucc,
- ConstantInt::getTrue(Context), NewSISucc);
- // Release the PHI operands for this edge.
- for (BasicBlock::iterator II = NewSISucc->begin();
- PHINode *PN = dyn_cast<PHINode>(II); ++II)
- PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
- UndefValue::get(PN->getType()));
- // Tell the domtree about the new block. We don't fully update the
- // domtree here -- instead we force it to do a full recomputation
- // after the pass is complete -- but we do need to inform it of
- // new blocks.
- if (DT)
- DT->addNewBlock(Abort, NewSISucc);
- break;
- }
- }
- }
+ // TODO: We could do other simplifications, for example, turning
+ // 'icmp eq LIC, Val' -> false.
+
+ // If we know that LIC is not Val, use this info to simplify code.
+ SwitchInst *SI = dyn_cast<SwitchInst>(U);
+ if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+
+ unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+ if (DeadCase == 0) continue; // Default case is live for multiple values.
+
+ // Found a dead case value. Don't remove PHI nodes in the
+ // successor if they become single-entry, those PHI nodes may
+ // be in the Users list.
- // TODO: We could do other simplifications, for example, turning
- // LIC == Val -> false.
- }
+ // FIXME: This is a hack. We need to keep the successor around
+ // and hooked up so as to preserve the loop structure, because
+ // trying to update it is complicated. So instead we preserve the
+ // loop structure and put the block on a dead code path.
+ BasicBlock *Switch = SI->getParent();
+ SplitEdge(Switch, SI->getSuccessor(DeadCase), this);
+ // Compute the successors instead of relying on the return value
+ // of SplitEdge, since it may have split the switch successor
+ // after PHI nodes.
+ BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
+ BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+ // Create an "unreachable" destination.
+ BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+ Switch->getParent(),
+ OldSISucc);
+ new UnreachableInst(Context, Abort);
+ // Force the new case destination to branch to the "unreachable"
+ // block while maintaining a (dead) CFG edge to the old block.
+ NewSISucc->getTerminator()->eraseFromParent();
+ BranchInst::Create(Abort, OldSISucc,
+ ConstantInt::getTrue(Context), NewSISucc);
+ // Release the PHI operands for this edge.
+ for (BasicBlock::iterator II = NewSISucc->begin();
+ PHINode *PN = dyn_cast<PHINode>(II); ++II)
+ PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+ UndefValue::get(PN->getType()));
+ // Tell the domtree about the new block. We don't fully update the
+ // domtree here -- instead we force it to do a full recomputation
+ // after the pass is complete -- but we do need to inform it of
+ // new blocks.
+ if (DT)
+ DT->addNewBlock(Abort, NewSISucc);
}
SimplifyCode(Worklist, L);
@@ -1054,7 +1052,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
LPM->deleteSimpleAnalysisValue(Succ, L);
Succ->eraseFromParent();
++NumSimplify;
- } else if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
+ break;
+ }
+
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
// Conditional branch. Turn it into an unconditional branch, then
// remove dead blocks.
break; // FIXME: Enable.
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 62e2977..3b305ae 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -413,7 +413,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// interesting as a small compile-time optimization.
Ranges.addStore(0, SI);
- Function *MemSetF = 0;
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
@@ -433,29 +432,40 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// memset block. This ensure that the memset is dominated by any addressing
// instruction needed by the start of the block.
BasicBlock::iterator InsertPt = BI;
-
- if (MemSetF == 0) {
- const Type *Ty = Type::getInt64Ty(Context);
- MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1);
- }
-
+
// Get the starting pointer of the block.
StartPtr = Range.StartPtr;
-
+
+ // Determine alignment
+ unsigned Alignment = Range.Alignment;
+ if (Alignment == 0) {
+ const Type *EltType =
+ cast<PointerType>(StartPtr->getType())->getElementType();
+ Alignment = TD->getABITypeAlignment(EltType);
+ }
+
// Cast the start ptr to be i8* as memset requires.
- const Type *i8Ptr = Type::getInt8PtrTy(Context);
- if (StartPtr->getType() != i8Ptr)
+ const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
+ const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
+ StartPTy->getAddressSpace());
+ if (StartPTy!= i8Ptr)
StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
InsertPt);
-
+
Value *Ops[] = {
StartPtr, ByteVal, // Start, value
// size
ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
// align
- ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment)
+ ConstantInt::get(Type::getInt32Ty(Context), Alignment),
+ // volatile
+ ConstantInt::get(Type::getInt1Ty(Context), 0),
};
- Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
+ const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+
+ Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+
+ Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
DEBUG(dbgs() << "Replace stores:\n";
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
dbgs() << *Range.TheStores[i];
@@ -680,16 +690,19 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
return false;
// If all checks passed, then we can transform these memcpy's
- const Type *Ty = M->getLength()->getType();
+ const Type *ArgTys[3] = { M->getRawDest()->getType(),
+ MDep->getRawSource()->getType(),
+ M->getLength()->getType() };
Function *MemCpyFun = Intrinsic::getDeclaration(
M->getParent()->getParent()->getParent(),
- M->getIntrinsicID(), &Ty, 1);
+ M->getIntrinsicID(), ArgTys, 3);
- Value *Args[4] = {
- M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
+ Value *Args[5] = {
+ M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ M->getAlignmentCst(), M->getVolatileCst()
};
- CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
+ CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
// If C and M don't interfere, then this is a valid transformation. If they
@@ -728,8 +741,10 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
// If not, then we know we can transform this.
Module *Mod = M->getParent()->getParent()->getParent();
- const Type *Ty = M->getLength()->getType();
- M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1));
+ const Type *ArgTys[3] = { M->getRawDest()->getType(),
+ M->getRawSource()->getType(),
+ M->getLength()->getType() };
+ M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 546b7b6..4f09bee 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1521,45 +1521,48 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
}
}
+ // Check to see if we have a branch or switch on an undefined value. If so
+ // we force the branch to go one way or the other to make the successor
+ // values live. It doesn't really matter which way we force it.
TerminatorInst *TI = BB->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (!BI->isConditional()) continue;
if (!getValueState(BI->getCondition()).isUndefined())
continue;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+
+ // If the input to SCCP is actually branch on undef, fix the undef to
+ // false.
+ if (isa<UndefValue>(BI->getCondition())) {
+ BI->setCondition(ConstantInt::getFalse(BI->getContext()));
+ markEdgeExecutable(BB, TI->getSuccessor(1));
+ return true;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Handle this by forcing the input value to the
+ // branch to false.
+ markForcedConstant(BI->getCondition(),
+ ConstantInt::getFalse(TI->getContext()));
+ return true;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
if (SI->getNumSuccessors() < 2) // no cases
continue;
if (!getValueState(SI->getCondition()).isUndefined())
continue;
- } else {
- continue;
- }
-
- // If the edge to the second successor isn't thought to be feasible yet,
- // mark it so now. We pick the second one so that this goes to some
- // enumerated value in a switch instead of going to the default destination.
- if (KnownFeasibleEdges.count(Edge(BB, TI->getSuccessor(1))))
- continue;
-
- // Otherwise, it isn't already thought to be feasible. Mark it as such now
- // and return. This will make other blocks reachable, which will allow new
- // values to be discovered and existing ones to be moved in the lattice.
- markEdgeExecutable(BB, TI->getSuccessor(1));
-
- // This must be a conditional branch of switch on undef. At this point,
- // force the old terminator to branch to the first successor. This is
- // required because we are now influencing the dataflow of the function with
- // the assumption that this edge is taken. If we leave the branch condition
- // as undef, then further analysis could think the undef went another way
- // leading to an inconsistent set of conclusions.
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- BI->setCondition(ConstantInt::getFalse(BI->getContext()));
- } else {
- SwitchInst *SI = cast<SwitchInst>(TI);
- SI->setCondition(SI->getCaseValue(1));
+
+ // If the input to SCCP is actually switch on undef, fix the undef to
+ // the first constant.
+ if (isa<UndefValue>(SI->getCondition())) {
+ SI->setCondition(SI->getCaseValue(1));
+ markEdgeExecutable(BB, TI->getSuccessor(1));
+ return true;
+ }
+
+ markForcedConstant(SI->getCondition(), SI->getCaseValue(1));
+ return true;
}
-
- return true;
}
return false;
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index bbe6270..6211beb 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -858,8 +858,17 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
// Cast the other pointer (if we have one) to BytePtrTy.
- if (OtherElt && OtherElt->getType() != BytePtrTy)
- OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI);
+ if (OtherElt && OtherElt->getType() != BytePtrTy) {
+ // Preserve address space of OtherElt
+ const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
+ const PointerType* PTy = cast<PointerType>(BytePtrTy);
+ if (OtherPTy->getElementType() != PTy->getElementType()) {
+ Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
+ OtherPTy->getAddressSpace());
+ OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
+ OtherElt->getNameStr(), MI);
+ }
+ }
unsigned EltSize = TD->getTypeAllocSize(EltTy);
@@ -870,17 +879,28 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
SROADest ? OtherElt : EltPtr, // Src ptr
ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
// Align
- ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
+ ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
+ MI->getVolatileCst()
};
- CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+ // In case we fold the address space overloaded memcpy of A to B
+ // with memcpy of B to C, change the function to be a memcpy of A to C.
+ const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
+ Ops[2]->getType() };
+ Module *M = MI->getParent()->getParent()->getParent();
+ TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
+ CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
} else {
assert(isa<MemSetInst>(MI));
Value *Ops[] = {
EltPtr, MI->getOperand(2), // Dest, Value,
ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
- Zero // Align
+ Zero, // Align
+ ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
};
- CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+ const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+ Module *M = MI->getParent()->getParent()->getParent();
+ TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+ CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
}
}
DeadInsts.push_back(MI);
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 5941ea6..b053cfc 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -142,7 +142,8 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
EmitMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD);
+ ConstantInt::get(TD->getIntPtrType(*Context), Len+1),
+ 1, false, B, TD);
}
};
@@ -383,7 +384,8 @@ struct StrCpyOpt : public LibCallOptimization {
CI->getOperand(3), B, TD);
else
EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ 1, false, B, TD);
return Dst;
}
};
@@ -411,8 +413,8 @@ struct StrNCpyOpt : public LibCallOptimization {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(x, '\0', y, 1)
- EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp,
- B, TD);
+ EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'),
+ LenOp, false, B, TD);
return Dst;
}
@@ -432,7 +434,8 @@ struct StrNCpyOpt : public LibCallOptimization {
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ 1, false, B, TD);
return Dst;
}
@@ -593,7 +596,7 @@ struct MemCpyOpt : public LibCallOptimization {
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), 1, B, TD);
+ CI->getOperand(3), 1, false, B, TD);
return CI->getOperand(1);
}
};
@@ -615,7 +618,7 @@ struct MemMoveOpt : public LibCallOptimization {
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
EmitMemMove(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), 1, B, TD);
+ CI->getOperand(3), 1, false, B, TD);
return CI->getOperand(1);
}
};
@@ -637,8 +640,8 @@ struct MemSetOpt : public LibCallOptimization {
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
- false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD);
+ false);
+ EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD);
return CI->getOperand(1);
}
};
@@ -999,7 +1002,7 @@ struct SPrintFOpt : public LibCallOptimization {
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()+1), 1, B, TD);
+ FormatStr.size()+1), 1, false, B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1013,11 +1016,11 @@ struct SPrintFOpt : public LibCallOptimization {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
Value *V = B.CreateTrunc(CI->getOperand(3),
- Type::getInt8Ty(*Context), "char");
+ Type::getInt8Ty(*Context), "char");
Value *Ptr = CastToCStr(CI->getOperand(1), B);
B.CreateStore(V, Ptr);
Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
- "nul");
+ "nul");
B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
return ConstantInt::get(CI->getType(), 1);
@@ -1034,7 +1037,7 @@ struct SPrintFOpt : public LibCallOptimization {
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD);
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 1f62dab..2f1ae00 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -336,21 +336,19 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
if (Loop *L = LI->getLoopFor(Old))
L->addBasicBlockToLoop(New, LI->getBase());
- if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>())
- {
- // Old dominates New. New node domiantes all other nodes dominated by Old.
- DomTreeNode *OldNode = DT->getNode(Old);
- std::vector<DomTreeNode *> Children;
- for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
- I != E; ++I)
- Children.push_back(*I);
-
- DomTreeNode *NewNode = DT->addNewBlock(New,Old);
-
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ // Old dominates New. New node domiantes all other nodes dominated by Old.
+ DomTreeNode *OldNode = DT->getNode(Old);
+ std::vector<DomTreeNode *> Children;
+ for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+ I != E; ++I)
+ Children.push_back(*I);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New,Old);
for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
E = Children.end(); I != E; ++I)
DT->changeImmediateDominator(*I, NewNode);
- }
+ }
if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>())
DF->splitBlock(Old);
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 0afccf4..fff8179 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -109,15 +109,16 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
/// expects that Len has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
- unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
+ bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
- const Type *Ty = Len->getType();
- Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
+ const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() };
+ Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3);
Dst = CastToCStr(Dst, B);
Src = CastToCStr(Src, B);
- return B.CreateCall4(MemCpy, Dst, Src, Len,
- ConstantInt::get(B.getInt32Ty(), Align));
+ return B.CreateCall5(MemCpy, Dst, Src, Len,
+ ConstantInt::get(B.getInt32Ty(), Align),
+ ConstantInt::get(B.getInt1Ty(), isVolatile));
}
/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
@@ -146,16 +147,18 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
/// EmitMemMove - Emit a call to the memmove function to the builder. This
/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len,
- unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align,
+ bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
LLVMContext &Context = B.GetInsertBlock()->getContext();
- const Type *Ty = TD->getIntPtrType(Context);
- Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
+ const Type *ArgTys[3] = { Dst->getType(), Src->getType(),
+ TD->getIntPtrType(Context) };
+ Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3);
Dst = CastToCStr(Dst, B);
Src = CastToCStr(Src, B);
Value *A = ConstantInt::get(B.getInt32Ty(), Align);
- return B.CreateCall4(MemMove, Dst, Src, Len, A);
+ Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
+ return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol);
}
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
@@ -206,15 +209,15 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
}
/// EmitMemSet - Emit a call to the memset function
-Value *llvm::EmitMemSet(Value *Dst, Value *Val,
- Value *Len, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile,
+ IRBuilder<> &B, const TargetData *TD) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
Intrinsic::ID IID = Intrinsic::memset;
- const Type *Tys[1];
- Tys[0] = Len->getType();
- Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
+ const Type *Tys[2] = { Dst->getType(), Len->getType() };
+ Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2);
Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
- return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
+ Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
+ return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol);
}
/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
@@ -381,7 +384,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
if (Name == "__memcpy_chk") {
if (isFoldable(4, 3, false)) {
EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
- 1, B, TD);
+ 1, false, B, TD);
replaceCall(CI->getOperand(1));
return true;
}
@@ -396,7 +399,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
if (Name == "__memmove_chk") {
if (isFoldable(4, 3, false)) {
EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
- 1, B, TD);
+ 1, false, B, TD);
replaceCall(CI->getOperand(1));
return true;
}
@@ -407,7 +410,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
if (isFoldable(4, 3, false)) {
Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD);
+ EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD);
replaceCall(CI->getOperand(1));
return true;
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 17f8827..75c9ccd 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -297,10 +297,10 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
I->getName(),
&*Caller->begin()->begin());
// Emit a memcpy.
- const Type *Tys[] = { Type::getInt64Ty(Context) };
+ const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
Intrinsic::memcpy,
- Tys, 1);
+ Tys, 3);
Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
@@ -309,17 +309,18 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
Size = ConstantExpr::getSizeOf(AggTy);
else
Size = ConstantInt::get(Type::getInt64Ty(Context),
- TD->getTypeStoreSize(AggTy));
+ TD->getTypeStoreSize(AggTy));
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
// better alignment.
Value *CallArgs[] = {
DestCast, SrcCast, Size,
- ConstantInt::get(Type::getInt32Ty(Context), 1)
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::get(Type::getInt1Ty(Context), 0)
};
CallInst *TheMemCpy =
- CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall);
+ CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
// If we have a call graph, update it.
if (CG) {
OpenPOWER on IntegriCloud