5 files changed, 180 insertions, 138 deletions
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 27fd2ef..3918738 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -231,8 +231,7 @@ bool LoopUnswitch::processCurrentLoop() {
   // block that is branching on a loop-invariant condition, we can unswitch this
   // loop.
   for (Loop::block_iterator I = currentLoop->block_begin(), 
-         E = currentLoop->block_end();
-       I != E; ++I) {
+         E = currentLoop->block_end(); I != E; ++I) {
     TerminatorInst *TI = (*I)->getTerminator();
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
       // If this isn't branching on an invariant condition, we can't unswitch
@@ -474,7 +473,6 @@ static inline void RemapInstruction(Instruction *I,
 static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
                        LoopInfo *LI, LPPassManager *LPM) {
   Loop *New = new Loop();
-
   LPM->insertLoop(New, PL);
 
   // Add all of the blocks in L to the new loop.
@@ -565,8 +563,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
 /// SplitExitEdges - Split all of the edges from inside the loop to their exit
 /// blocks.  Update the appropriate Phi nodes as we do so.
 void LoopUnswitch::SplitExitEdges(Loop *L, 
-                                const SmallVector<BasicBlock *, 8> &ExitBlocks) 
-{
+                                const SmallVector<BasicBlock *, 8> &ExitBlocks){
 
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBlock = ExitBlocks[i];
@@ -619,15 +616,15 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   NewBlocks.reserve(LoopBlocks.size());
   DenseMap<const Value*, Value*> ValueMap;
   for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
-    BasicBlock *New = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
-    NewBlocks.push_back(New);
-    ValueMap[LoopBlocks[i]] = New;  // Keep the BB mapping.
-    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L);
+    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
+    NewBlocks.push_back(NewBB);
+    ValueMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
+    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
   }
 
   // Splice the newly inserted blocks into the function right before the
   // original preheader.
-  F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(),
+  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
                                 NewBlocks[0], F->end());
 
   // Now we create the new Loop object for the versioned loop.
@@ -652,8 +649,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
     // If the successor of the exit block had PHI nodes, add an entry for
     // NewExit.
     PHINode *PN;
-    for (BasicBlock::iterator I = ExitSucc->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
+    for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
+      PN = cast<PHINode>(I);
       Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
       DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V);
       if (It != ValueMap.end()) V = It->second;
@@ -682,7 +679,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
 
   // Now we rewrite the original code to know that the condition is true and the
   // new code to know that the condition is false.
-  RewriteLoopBodyWithConditionConstant(L      , LIC, Val, false);
+  RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
   
   // It's possible that simplifying one loop could cause the other to be
   // deleted.  If so, don't simplify it.
@@ -884,65 +881,66 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
         U->replaceUsesOfWith(LIC, Replacement);
         Worklist.push_back(U);
       }
-  } else {
-    // Otherwise, we don't know the precise value of LIC, but we do know that it
-    // is certainly NOT "Val".  As such, simplify any uses in the loop that we
-    // can.  This case occurs when we unswitch switch statements.
-    for (unsigned i = 0, e = Users.size(); i != e; ++i)
-      if (Instruction *U = cast<Instruction>(Users[i])) {
-        if (!L->contains(U))
-          continue;
+    SimplifyCode(Worklist, L);
+    return;
+  }
+  
+  // Otherwise, we don't know the precise value of LIC, but we do know that it
+  // is certainly NOT "Val".  As such, simplify any uses in the loop that we
+  // can.  This case occurs when we unswitch switch statements.
+  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+    Instruction *U = cast<Instruction>(Users[i]);
+    if (!L->contains(U))
+      continue;
 
-        Worklist.push_back(U);
+    Worklist.push_back(U);
 
-        // If we know that LIC is not Val, use this info to simplify code.
-        if (SwitchInst *SI = dyn_cast<SwitchInst>(U)) {
-          for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
-            if (SI->getCaseValue(i) == Val) {
-              // Found a dead case value.  Don't remove PHI nodes in the 
-              // successor if they become single-entry, those PHI nodes may
-              // be in the Users list.
-              
-              // FIXME: This is a hack.  We need to keep the successor around
-              // and hooked up so as to preserve the loop structure, because
-              // trying to update it is complicated.  So instead we preserve the
-              // loop structure and put the block on a dead code path.
-              BasicBlock *Switch = SI->getParent();
-              SplitEdge(Switch, SI->getSuccessor(i), this);
-              // Compute the successors instead of relying on the return value
-              // of SplitEdge, since it may have split the switch successor
-              // after PHI nodes.
-              BasicBlock *NewSISucc = SI->getSuccessor(i);
-              BasicBlock *OldSISucc = *succ_begin(NewSISucc);
-              // Create an "unreachable" destination.
-              BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
-                                                     Switch->getParent(),
-                                                     OldSISucc);
-              new UnreachableInst(Context, Abort);
-              // Force the new case destination to branch to the "unreachable"
-              // block while maintaining a (dead) CFG edge to the old block.
-              NewSISucc->getTerminator()->eraseFromParent();
-              BranchInst::Create(Abort, OldSISucc,
-                                 ConstantInt::getTrue(Context), NewSISucc);
-              // Release the PHI operands for this edge.
-              for (BasicBlock::iterator II = NewSISucc->begin();
-                   PHINode *PN = dyn_cast<PHINode>(II); ++II)
-                PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
-                                     UndefValue::get(PN->getType()));
-              // Tell the domtree about the new block. We don't fully update the
-              // domtree here -- instead we force it to do a full recomputation
-              // after the pass is complete -- but we do need to inform it of
-              // new blocks.
-              if (DT)
-                DT->addNewBlock(Abort, NewSISucc);
-              break;
-            }
-          }
-        }
+    // TODO: We could do other simplifications, for example, turning 
+    // 'icmp eq LIC, Val' -> false.
+
+    // If we know that LIC is not Val, use this info to simplify code.
+    SwitchInst *SI = dyn_cast<SwitchInst>(U);
+    if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+    
+    unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+    if (DeadCase == 0) continue;  // Default case is live for multiple values.
+    
+    // Found a dead case value.  Don't remove PHI nodes in the 
+    // successor if they become single-entry, those PHI nodes may
+    // be in the Users list.
         
-        // TODO: We could do other simplifications, for example, turning 
-        // LIC == Val -> false.
-      }
+    // FIXME: This is a hack.  We need to keep the successor around
+    // and hooked up so as to preserve the loop structure, because
+    // trying to update it is complicated.  So instead we preserve the
+    // loop structure and put the block on a dead code path.
+    BasicBlock *Switch = SI->getParent();
+    SplitEdge(Switch, SI->getSuccessor(DeadCase), this);
+    // Compute the successors instead of relying on the return value
+    // of SplitEdge, since it may have split the switch successor
+    // after PHI nodes.
+    BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
+    BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+    // Create an "unreachable" destination.
+    BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+                                           Switch->getParent(),
+                                           OldSISucc);
+    new UnreachableInst(Context, Abort);
+    // Force the new case destination to branch to the "unreachable"
+    // block while maintaining a (dead) CFG edge to the old block.
+    NewSISucc->getTerminator()->eraseFromParent();
+    BranchInst::Create(Abort, OldSISucc,
+                       ConstantInt::getTrue(Context), NewSISucc);
+    // Release the PHI operands for this edge.
+    for (BasicBlock::iterator II = NewSISucc->begin();
+         PHINode *PN = dyn_cast<PHINode>(II); ++II)
+      PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+                           UndefValue::get(PN->getType()));
+    // Tell the domtree about the new block. We don't fully update the
+    // domtree here -- instead we force it to do a full recomputation
+    // after the pass is complete -- but we do need to inform it of
+    // new blocks.
+    if (DT)
+      DT->addNewBlock(Abort, NewSISucc);
   }
   
   SimplifyCode(Worklist, L);
@@ -1054,7 +1052,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         LPM->deleteSimpleAnalysisValue(Succ, L);
         Succ->eraseFromParent();
         ++NumSimplify;
-      } else if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
+        break;
+      }
+      
+      if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
         // Conditional branch.  Turn it into an unconditional branch, then
         // remove dead blocks.
         break;  // FIXME: Enable.
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 62e2977..3b305ae 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -413,7 +413,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   // interesting as a small compile-time optimization.
   Ranges.addStore(0, SI);
   
-  Function *MemSetF = 0;
   
   // Now that we have full information about ranges, loop over the ranges and
   // emit memset's for anything big enough to be worthwhile.
@@ -433,29 +432,40 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
     // memset block.  This ensure that the memset is dominated by any addressing
     // instruction needed by the start of the block.
     BasicBlock::iterator InsertPt = BI;
-  
-    if (MemSetF == 0) {
-      const Type *Ty = Type::getInt64Ty(Context);
-      MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1);
-    }
-    
+
     // Get the starting pointer of the block.
     StartPtr = Range.StartPtr;
-  
+
+    // Determine alignment
+    unsigned Alignment = Range.Alignment;
+    if (Alignment == 0) {
+      const Type *EltType = 
+         cast<PointerType>(StartPtr->getType())->getElementType();
+      Alignment = TD->getABITypeAlignment(EltType);
+    }
+
     // Cast the start ptr to be i8* as memset requires.
-    const Type *i8Ptr = Type::getInt8PtrTy(Context);
-    if (StartPtr->getType() != i8Ptr)
+    const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
+    const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
+                                                  StartPTy->getAddressSpace());
+    if (StartPTy!= i8Ptr)
       StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
                                  InsertPt);
-  
+
     Value *Ops[] = {
       StartPtr, ByteVal,   // Start, value
       // size
       ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
       // align
-      ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment)
+      ConstantInt::get(Type::getInt32Ty(Context), Alignment),
+      // volatile
+      ConstantInt::get(Type::getInt1Ty(Context), 0),
     };
-    Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
+    const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+
+    Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+
+    Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
     DEBUG(dbgs() << "Replace stores:\n";
           for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
             dbgs() << *Range.TheStores[i];
@@ -680,16 +690,19 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
     return false;
   
   // If all checks passed, then we can transform these memcpy's
-  const Type *Ty = M->getLength()->getType();
+  const Type *ArgTys[3] = { M->getRawDest()->getType(),
+                            MDep->getRawSource()->getType(),
+                            M->getLength()->getType() };
   Function *MemCpyFun = Intrinsic::getDeclaration(
                                  M->getParent()->getParent()->getParent(),
-                                 M->getIntrinsicID(), &Ty, 1);
+                                 M->getIntrinsicID(), ArgTys, 3);
     
-  Value *Args[4] = {
-    M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
+  Value *Args[5] = {
+    M->getRawDest(), MDep->getRawSource(), M->getLength(),
+    M->getAlignmentCst(), M->getVolatileCst()
   };
   
-  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
+  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
   
   
   // If C and M don't interfere, then this is a valid transformation.  If they
@@ -728,8 +741,10 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   
   // If not, then we know we can transform this.
   Module *Mod = M->getParent()->getParent()->getParent();
-  const Type *Ty = M->getLength()->getType();
-  M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1));
+  const Type *ArgTys[3] = { M->getRawDest()->getType(),
+                            M->getRawSource()->getType(),
+                            M->getLength()->getType() };
+  M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 546b7b6..4f09bee 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1521,45 +1521,48 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       }
     }
   
+    // Check to see if we have a branch or switch on an undefined value.  If so
+    // we force the branch to go one way or the other to make the successor
+    // values live.  It doesn't really matter which way we force it.
     TerminatorInst *TI = BB->getTerminator();
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
       if (!BI->isConditional()) continue;
       if (!getValueState(BI->getCondition()).isUndefined())
         continue;
-    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    
+      // If the input to SCCP is actually branch on undef, fix the undef to
+      // false.
+      if (isa<UndefValue>(BI->getCondition())) {
+        BI->setCondition(ConstantInt::getFalse(BI->getContext()));
+        markEdgeExecutable(BB, TI->getSuccessor(1));
+        return true;
+      }
+      
+      // Otherwise, it is a branch on a symbolic value which is currently
+      // considered to be undef.  Handle this by forcing the input value to the
+      // branch to false.
+      markForcedConstant(BI->getCondition(),
+                         ConstantInt::getFalse(TI->getContext()));
+      return true;
+    }
+    
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
       if (SI->getNumSuccessors() < 2)   // no cases
         continue;
       if (!getValueState(SI->getCondition()).isUndefined())
         continue;
-    } else {
-      continue;
-    }
-    
-    // If the edge to the second successor isn't thought to be feasible yet,
-    // mark it so now.  We pick the second one so that this goes to some
-    // enumerated value in a switch instead of going to the default destination.
-    if (KnownFeasibleEdges.count(Edge(BB, TI->getSuccessor(1))))
-      continue;
-    
-    // Otherwise, it isn't already thought to be feasible.  Mark it as such now
-    // and return.  This will make other blocks reachable, which will allow new
-    // values to be discovered and existing ones to be moved in the lattice.
-    markEdgeExecutable(BB, TI->getSuccessor(1));
-    
-    // This must be a conditional branch of switch on undef.  At this point,
-    // force the old terminator to branch to the first successor.  This is
-    // required because we are now influencing the dataflow of the function with
-    // the assumption that this edge is taken.  If we leave the branch condition
-    // as undef, then further analysis could think the undef went another way
-    // leading to an inconsistent set of conclusions.
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      BI->setCondition(ConstantInt::getFalse(BI->getContext()));
-    } else {
-      SwitchInst *SI = cast<SwitchInst>(TI);
-      SI->setCondition(SI->getCaseValue(1));
+      
+      // If the input to SCCP is actually switch on undef, fix the undef to
+      // the first constant.
+      if (isa<UndefValue>(SI->getCondition())) {
+        SI->setCondition(SI->getCaseValue(1));
+        markEdgeExecutable(BB, TI->getSuccessor(1));
+        return true;
+      }
+      
+      markForcedConstant(SI->getCondition(), SI->getCaseValue(1));
+      return true;
     }
-    
-    return true;
   }
 
   return false;
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index bbe6270..6211beb 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -858,8 +858,17 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
       EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
     
     // Cast the other pointer (if we have one) to BytePtrTy. 
-    if (OtherElt && OtherElt->getType() != BytePtrTy)
-      OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI);
+    if (OtherElt && OtherElt->getType() != BytePtrTy) {
+      // Preserve address space of OtherElt
+      const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
+      const PointerType* PTy = cast<PointerType>(BytePtrTy);
+      if (OtherPTy->getElementType() != PTy->getElementType()) {
+        Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
+                                             OtherPTy->getAddressSpace());
+        OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
+                                   OtherElt->getNameStr(), MI);
+      }
+    }
     
     unsigned EltSize = TD->getTypeAllocSize(EltTy);
     
@@ -870,17 +879,28 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
         SROADest ? OtherElt : EltPtr,  // Src ptr
         ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
         // Align
-        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
+        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
+        MI->getVolatileCst()
       };
-      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+      // In case we fold the address space overloaded memcpy of A to B
+      // with memcpy of B to C, change the function to be a memcpy of A to C.
+      const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
+                            Ops[2]->getType() };
+      Module *M = MI->getParent()->getParent()->getParent();
+      TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
+      CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
     } else {
       assert(isa<MemSetInst>(MI));
       Value *Ops[] = {
         EltPtr, MI->getOperand(2),  // Dest, Value,
         ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
-        Zero  // Align
+        Zero,  // Align
+        ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
       };
-      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+      const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+      Module *M = MI->getParent()->getParent()->getParent();
+      TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+      CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
     }
   }
   DeadInsts.push_back(MI);
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 5941ea6..b053cfc 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -142,7 +142,8 @@ struct StrCatOpt : public LibCallOptimization {
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(CpyDst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len+1),
+                                1, false, B, TD);
   }
 };
 
@@ -383,7 +384,8 @@ struct StrCpyOpt : public LibCallOptimization {
                     CI->getOperand(3), B, TD);
     else
       EmitMemCpy(Dst, Src,
-                 ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
+                 ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                                  1, false, B, TD);
     return Dst;
   }
 };
@@ -411,8 +413,8 @@ struct StrNCpyOpt : public LibCallOptimization {
 
     if (SrcLen == 0) {
       // strncpy(x, "", y) -> memset(x, '\0', y, 1)
-      EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp,
-		             B, TD);
+      EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'),
+                 LenOp, false, B, TD);
       return Dst;
     }
 
@@ -432,7 +434,8 @@ struct StrNCpyOpt : public LibCallOptimization {
 
     // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
     EmitMemCpy(Dst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                                1, false, B, TD);
 
     return Dst;
   }
@@ -593,7 +596,7 @@ struct MemCpyOpt : public LibCallOptimization {
 
     // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
     EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
-               CI->getOperand(3), 1, B, TD);
+               CI->getOperand(3), 1, false, B, TD);
     return CI->getOperand(1);
   }
 };
@@ -615,7 +618,7 @@ struct MemMoveOpt : public LibCallOptimization {
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
     EmitMemMove(CI->getOperand(1), CI->getOperand(2),
-                CI->getOperand(3), 1, B, TD);
+                CI->getOperand(3), 1, false, B, TD);
     return CI->getOperand(1);
   }
 };
@@ -637,8 +640,8 @@ struct MemSetOpt : public LibCallOptimization {
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
     Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
-				 false);
-    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
+                                 false);
+    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), false, B, TD);
     return CI->getOperand(1);
   }
 };
@@ -999,7 +1002,7 @@ struct SPrintFOpt : public LibCallOptimization {
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
                  ConstantInt::get(TD->getIntPtrType(*Context),
-                 FormatStr.size()+1), 1, B, TD);
+                 FormatStr.size()+1), 1, false, B, TD);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1013,11 +1016,11 @@ struct SPrintFOpt : public LibCallOptimization {
       // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
       if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
       Value *V = B.CreateTrunc(CI->getOperand(3),
-			       Type::getInt8Ty(*Context), "char");
+                               Type::getInt8Ty(*Context), "char");
       Value *Ptr = CastToCStr(CI->getOperand(1), B);
       B.CreateStore(V, Ptr);
       Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
-			"nul");
+                        "nul");
       B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
 
       return ConstantInt::get(CI->getType(), 1);
@@ -1034,7 +1037,7 @@ struct SPrintFOpt : public LibCallOptimization {
       Value *IncLen = B.CreateAdd(Len,
                                   ConstantInt::get(Len->getType(), 1),
                                   "leninc");
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD);
+      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD);
 
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);