38 files changed, 3287 insertions, 2499 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
new file mode 100644
index 0000000..cce016a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -0,0 +1,114 @@
+//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h).
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace llvm {
+
+// We sort the stack variables by alignment (largest first) to minimize
+// unnecessary large gaps due to alignment.
+// It is tempting to also sort variables by size so that larger variables
+// have larger redzones at both ends. But reordering will make report analysis
+// harder, especially when temporary unnamed variables are present.
+// So, until we can provide more information (type, line number, etc)
+// for the stack variables we avoid reordering them too much.
+static inline bool CompareVars(const ASanStackVariableDescription &a,
+                               const ASanStackVariableDescription &b) {
+  return a.Alignment > b.Alignment;
+}
+
+// We also force minimal alignment for all vars to kMinAlignment so that vars
+// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
+static const size_t kMinAlignment = 16;
+
+static size_t RoundUpTo(size_t X, size_t RoundTo) {
+  assert((RoundTo & (RoundTo - 1)) == 0);
+  return (X + RoundTo - 1) & ~(RoundTo - 1);
+}
+
+// The larger the variable Size the larger is the redzone.
+// The resulting frame size is a multiple of Alignment.
+static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
+  size_t Res = 0;
+  if (Size <= 4)  Res = 16;
+  else if (Size <= 16) Res = 32;
+  else if (Size <= 128) Res = Size + 32;
+  else if (Size <= 512) Res = Size + 64;
+  else if (Size <= 4096) Res = Size + 128;
+  else                   Res = Size + 256;
+  return RoundUpTo(Res, Alignment);
+}
+
+void
+ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
+                            size_t Granularity, size_t MinHeaderSize,
+                            ASanStackFrameLayout *Layout) {
+  assert(Granularity >= 8 && Granularity <= 64 &&
+         (Granularity & (Granularity - 1)) == 0);
+  assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
+         MinHeaderSize >= Granularity);
+  size_t NumVars = Vars.size();
+  assert(NumVars > 0);
+  for (size_t i = 0; i < NumVars; i++)
+    Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
+
+  std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
+  SmallString<2048> StackDescriptionStorage;
+  raw_svector_ostream StackDescription(StackDescriptionStorage);
+  StackDescription << NumVars;
+  Layout->FrameAlignment = std::max(Granularity, Vars[0].Alignment);
+  SmallVector<uint8_t, 64> &SB(Layout->ShadowBytes);
+  SB.clear();
+  size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
+     Vars[0].Alignment);
+  assert((Offset % Granularity) == 0);
+  SB.insert(SB.end(), Offset / Granularity, kAsanStackLeftRedzoneMagic);
+  for (size_t i = 0; i < NumVars; i++) {
+    bool IsLast = i == NumVars - 1;
+    size_t Alignment = std::max(Granularity, Vars[i].Alignment);
+    (void)Alignment;  // Used only in asserts.
+    size_t Size = Vars[i].Size;
+    const char *Name = Vars[i].Name;
+    assert((Alignment & (Alignment - 1)) == 0);
+    assert(Layout->FrameAlignment >= Alignment);
+    assert((Offset % Alignment) == 0);
+    assert(Size > 0);
+    StackDescription << " " << Offset << " " << Size << " " << strlen(Name)
+                     << " " << Name;
+    size_t NextAlignment = IsLast ? Granularity
+                   : std::max(Granularity, Vars[i + 1].Alignment);
+    size_t SizeWithRedzone = VarAndRedzoneSize(Vars[i].Size, NextAlignment);
+    SB.insert(SB.end(), Size / Granularity, 0);
+    if (Size % Granularity)
+      SB.insert(SB.end(), Size % Granularity);
+    SB.insert(SB.end(), (SizeWithRedzone - Size) / Granularity,
+        IsLast ? kAsanStackRightRedzoneMagic
+        : kAsanStackMidRedzoneMagic);
+    Vars[i].Offset = Offset;
+    Offset += SizeWithRedzone;
+  }
+  if (Offset % MinHeaderSize) {
+    size_t ExtraRedzone = MinHeaderSize - (Offset % MinHeaderSize);
+    SB.insert(SB.end(), ExtraRedzone / Granularity,
+              kAsanStackRightRedzoneMagic);
+    Offset += ExtraRedzone;
+  }
+  Layout->DescriptionString = StackDescription.str();
+  Layout->FrameSize = Offset;
+  assert((Layout->FrameSize % MinHeaderSize) == 0);
+  assert(Layout->FrameSize / Granularity == Layout->ShadowBytes.size());
+}
+
+} // llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
new file mode 100644
index 0000000..196ac79
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -0,0 +1,222 @@
+//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file adds DWARF discriminators to the IR. Path discriminators are
+// used to decide what CFG path was taken inside sub-graphs whose instructions
+// share the same line and column number information.
+//
+// The main user of this is the sample profiler. Instruction samples are
+// mapped to line number information. Since a single line may be spread
+// out over several basic blocks, discriminators add more precise location
+// for the samples.
+//
+// For example,
+//
+//   1  #define ASSERT(P)
+//   2      if (!(P))
+//   3        abort()
+//   ...
+//   100   while (true) {
+//   101     ASSERT (sum < 0);
+//   102     ...
+//   130   }
+//
+// when converted to IR, this snippet looks something like:
+//
+// while.body:                                       ; preds = %entry, %if.end
+//   %0 = load i32* %sum, align 4, !dbg !15
+//   %cmp = icmp slt i32 %0, 0, !dbg !15
+//   br i1 %cmp, label %if.end, label %if.then, !dbg !15
+//
+// if.then:                                          ; preds = %while.body
+//   call void @abort(), !dbg !15
+//   br label %if.end, !dbg !15
+//
+// Notice that all the instructions in blocks 'while.body' and 'if.then'
+// have exactly the same debug information. When this program is sampled
+// at runtime, the profiler will assume that all these instructions are
+// equally frequent. This, in turn, will consider the edge while.body->if.then
+// to be frequently taken (which is incorrect).
+//
+// By adding a discriminator value to the instructions in block 'if.then',
+// we can distinguish instructions at line 101 with discriminator 0 from
+// the instructions at line 101 with discriminator 1.
+//
+// For more details about DWARF discriminators, please visit
+// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "add-discriminators"
+
+namespace {
+  struct AddDiscriminators : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    AddDiscriminators() : FunctionPass(ID) {
+      initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override;
+  };
+}
+
+char AddDiscriminators::ID = 0;
+INITIALIZE_PASS_BEGIN(AddDiscriminators, "add-discriminators",
+                      "Add DWARF path discriminators", false, false)
+INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators",
+                    "Add DWARF path discriminators", false, false)
+
+// Command line option to disable discriminator generation even in the
+// presence of debug information. This is only needed when debugging
+// debug info generation issues.
+static cl::opt<bool>
+NoDiscriminators("no-discriminators", cl::init(false),
+                 cl::desc("Disable generation of discriminator information."));
+
+FunctionPass *llvm::createAddDiscriminatorsPass() {
+  return new AddDiscriminators();
+}
+
+static bool hasDebugInfo(const Function &F) {
+  NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
+  return CUNodes != nullptr;
+}
+
+/// \brief Assign DWARF discriminators.
+///
+/// To assign discriminators, we examine the boundaries of every
+/// basic block and its successors. Suppose there is a basic block B1
+/// with successor B2. The last instruction I1 in B1 and the first
+/// instruction I2 in B2 are located at the same file and line number.
+/// This situation is illustrated in the following code snippet:
+///
+///       if (i < 10) x = i;
+///
+///     entry:
+///       br i1 %cmp, label %if.then, label %if.end, !dbg !10
+///     if.then:
+///       %1 = load i32* %i.addr, align 4, !dbg !10
+///       store i32 %1, i32* %x, align 4, !dbg !10
+///       br label %if.end, !dbg !10
+///     if.end:
+///       ret void, !dbg !12
+///
+/// Notice how the branch instruction in block 'entry' and all the
+/// instructions in block 'if.then' have the exact same debug location
+/// information (!dbg !10).
+///
+/// To distinguish instructions in block 'entry' from instructions in
+/// block 'if.then', we generate a new lexical block for all the
+/// instruction in block 'if.then' that share the same file and line
+/// location with the last instruction of block 'entry'.
+///
+/// This new lexical block will have the same location information as
+/// the previous one, but with a new DWARF discriminator value.
+///
+/// One of the main uses of this discriminator value is in runtime
+/// sample profilers. It allows the profiler to distinguish instructions
+/// at location !dbg !10 that execute on different basic blocks. This is
+/// important because while the predicate 'if (x < 10)' may have been
+/// executed millions of times, the assignment 'x = i' may have only
+/// executed a handful of times (meaning that the entry->if.then edge is
+/// seldom taken).
+///
+/// If we did not have discriminator information, the profiler would
+/// assign the same weight to both blocks 'entry' and 'if.then', which
+/// in turn will make it conclude that the entry->if.then edge is very
+/// hot.
+///
+/// To decide where to create new discriminator values, this function
+/// traverses the CFG and examines instruction at basic block boundaries.
+/// If the last instruction I1 of a block B1 is at the same file and line
+/// location as instruction I2 of successor B2, then it creates a new
+/// lexical block for I2 and all the instruction in B2 that share the same
+/// file and line location as I2. This new lexical block will have a
+/// different discriminator number than I1.
+bool AddDiscriminators::runOnFunction(Function &F) {
+  // If the function has debug information, but the user has disabled
+  // discriminators, do nothing.
+  // Simlarly, if the function has no debug info, do nothing.
+  // Finally, if this module is built with dwarf versions earlier than 4,
+  // do nothing (discriminator support is a DWARF 4 feature).
+  if (NoDiscriminators ||
+      !hasDebugInfo(F) ||
+      F.getParent()->getDwarfVersion() < 4)
+    return false;
+
+  bool Changed = false;
+  Module *M = F.getParent();
+  LLVMContext &Ctx = M->getContext();
+  DIBuilder Builder(*M);
+
+  // Traverse all the blocks looking for instructions in different
+  // blocks that are at the same file:line location.
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BasicBlock *B = I;
+    TerminatorInst *Last = B->getTerminator();
+    DebugLoc LastLoc = Last->getDebugLoc();
+    if (LastLoc.isUnknown()) continue;
+    DILocation LastDIL(LastLoc.getAsMDNode(Ctx));
+
+    for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
+      BasicBlock *Succ = Last->getSuccessor(I);
+      Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
+      DebugLoc FirstLoc = First->getDebugLoc();
+      if (FirstLoc.isUnknown()) continue;
+      DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx));
+
+      // If the first instruction (First) of Succ is at the same file
+      // location as B's last instruction (Last), add a new
+      // discriminator for First's location and all the instructions
+      // in Succ that share the same location with First.
+      if (FirstDIL.atSameLineAs(LastDIL)) {
+        // Create a new lexical scope and compute a new discriminator
+        // number for it.
+        StringRef Filename = FirstDIL.getFilename();
+        unsigned LineNumber = FirstDIL.getLineNumber();
+        unsigned ColumnNumber = FirstDIL.getColumnNumber();
+        DIScope Scope = FirstDIL.getScope();
+        DIFile File = Builder.createFile(Filename, Scope.getDirectory());
+        unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx);
+        DILexicalBlock NewScope = Builder.createLexicalBlock(
+            Scope, File, LineNumber, ColumnNumber, Discriminator);
+        DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope);
+        DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL);
+
+        // Attach this new debug location to First and every
+        // instruction following First that shares the same location.
+        for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
+             ++I1) {
+          if (I1->getDebugLoc() != FirstLoc) break;
+          I1->setDebugLoc(newDebugLoc);
+          DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber()
+                       << ":" << NewDIL.getColumnNumber() << ":"
+                       << NewDIL.getDiscriminator() << *I1 << "\n");
+        }
+        DEBUG(dbgs() << "\n");
+        Changed = true;
+      }
+    }
+  }
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 12de9ee..602e8ba 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -15,17 +15,17 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
@@ -68,8 +68,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
 void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
   if (!isa<PHINode>(BB->begin())) return;
 
-  AliasAnalysis *AA = 0;
-  MemoryDependenceAnalysis *MemDep = 0;
+  AliasAnalysis *AA = nullptr;
+  MemoryDependenceAnalysis *MemDep = nullptr;
   if (P) {
     AA = P->getAnalysisIfAvailable<AliasAnalysis>();
     MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
@@ -130,7 +130,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   BasicBlock *OnlySucc = BB;
   for (; SI != SE; ++SI)
     if (*SI != OnlySucc) {
-      OnlySucc = 0;     // There are multiple distinct successors!
+      OnlySucc = nullptr;     // There are multiple distinct successors!
       break;
     }
 
@@ -167,15 +167,17 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
 
   // Finally, erase the old block and update dominator info.
   if (P) {
-    if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
-      if (DomTreeNode *DTN = DT->getNode(BB)) {
-        DomTreeNode *PredDTN = DT->getNode(PredBB);
+    if (DominatorTreeWrapperPass *DTWP =
+            P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+      DominatorTree &DT = DTWP->getDomTree();
+      if (DomTreeNode *DTN = DT.getNode(BB)) {
+        DomTreeNode *PredDTN = DT.getNode(PredBB);
         SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
         for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
              DE = Children.end(); DI != DE; ++DI)
-          DT->changeImmediateDominator(*DI, PredDTN);
+          DT.changeImmediateDominator(*DI, PredDTN);
 
-        DT->eraseNode(BB);
+        DT.eraseNode(BB);
       }
 
       if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
@@ -215,7 +217,7 @@ void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
 ///
 void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
                                BasicBlock::iterator &BI, Instruction *I) {
-  assert(I->getParent() == 0 &&
+  assert(I->getParent() == nullptr &&
          "ReplaceInstWithInst: Instruction already inserted into basic block!");
 
   // Insert the new instruction into the basic block...
@@ -252,7 +254,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
     // If the successor only has a single pred, split the top of the successor
     // block.
     assert(SP == BB && "CFG broken");
-    SP = NULL;
+    SP = nullptr;
     return SplitBlock(Succ, Succ->begin(), P);
   }
 
@@ -280,18 +282,20 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
     if (Loop *L = LI->getLoopFor(Old))
       L->addBasicBlockToLoop(New, LI->getBase());
 
-  if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+  if (DominatorTreeWrapperPass *DTWP =
+          P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+    DominatorTree &DT = DTWP->getDomTree();
     // Old dominates New. New node dominates all other nodes dominated by Old.
-    if (DomTreeNode *OldNode = DT->getNode(Old)) {
+    if (DomTreeNode *OldNode = DT.getNode(Old)) {
       std::vector<DomTreeNode *> Children;
       for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
            I != E; ++I)
         Children.push_back(*I);
 
-      DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+      DomTreeNode *NewNode = DT.addNewBlock(New, Old);
       for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
              E = Children.end(); I != E; ++I)
-        DT->changeImmediateDominator(*I, NewNode);
+        DT.changeImmediateDominator(*I, NewNode);
     }
   }
 
@@ -306,7 +310,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
   if (!P) return;
 
   LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
-  Loop *L = LI ? LI->getLoopFor(OldBB) : 0;
+  Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr;
 
   // If we need to preserve loop analyses, collect some information about how
   // this split will affect loops.
@@ -336,9 +340,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
   }
 
   // Update dominator tree if available.
-  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
-  if (DT)
-    DT->splitBlock(NewBB);
+  if (DominatorTreeWrapperPass *DTWP =
+          P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+    DTWP->getDomTree().splitBlock(NewBB);
 
   if (!L) return;
 
@@ -347,7 +351,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
     // loop). To find this, examine each of the predecessors and determine which
     // loops enclose them, and select the most-nested loop which contains the
     // loop containing the block being split.
-    Loop *InnermostPredLoop = 0;
+    Loop *InnermostPredLoop = nullptr;
     for (ArrayRef<BasicBlock*>::iterator
            i = Preds.begin(), e = Preds.end(); i != e; ++i) {
       BasicBlock *Pred = *i;
@@ -380,51 +384,68 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
                            ArrayRef<BasicBlock*> Preds, BranchInst *BI,
                            Pass *P, bool HasLoopExit) {
   // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
-  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr;
+  SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
   for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I++);
 
     // Check to see if all of the values coming in are the same.  If so, we
     // don't need to create a new PHI node, unless it's needed for LCSSA.
-    Value *InVal = 0;
+    Value *InVal = nullptr;
     if (!HasLoopExit) {
       InVal = PN->getIncomingValueForBlock(Preds[0]);
-      for (unsigned i = 1, e = Preds.size(); i != e; ++i)
-        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
-          InVal = 0;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        if (!PredSet.count(PN->getIncomingBlock(i)))
+          continue;
+        if (!InVal)
+          InVal = PN->getIncomingValue(i);
+        else if (InVal != PN->getIncomingValue(i)) {
+          InVal = nullptr;
           break;
         }
+      }
     }
 
     if (InVal) {
       // If all incoming values for the new PHI would be the same, just don't
       // make a new PHI.  Instead, just remove the incoming values from the old
       // PHI.
-      for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
-        // Explicitly check the BB index here to handle duplicates in Preds.
-        int Idx = PN->getBasicBlockIndex(Preds[i]);
-        if (Idx >= 0)
-          PN->removeIncomingValue(Idx, false);
-      }
-    } else {
-      // If the values coming into the block are not the same, we need a PHI.
-      // Create the new PHI node, insert it into NewBB at the end of the block
-      PHINode *NewPHI =
-        PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
-      if (AA) AA->copyValue(PN, NewPHI);
 
-      // Move all of the PHI values for 'Preds' to the new PHI.
-      for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
-        Value *V = PN->removeIncomingValue(Preds[i], false);
-        NewPHI->addIncoming(V, Preds[i]);
-      }
+      // NOTE! This loop walks backwards for a reason! First off, this minimizes
+      // the cost of removal if we end up removing a large number of values, and
+      // second off, this ensures that the indices for the incoming values
+      // aren't invalidated when we remove one.
+      for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i)
+        if (PredSet.count(PN->getIncomingBlock(i)))
+          PN->removeIncomingValue(i, false);
+
+      // Add an incoming value to the PHI node in the loop for the preheader
+      // edge.
+      PN->addIncoming(InVal, NewBB);
+      continue;
+    }
 
-      InVal = NewPHI;
+    // If the values coming into the block are not the same, we need a new
+    // PHI.
+    // Create the new PHI node, insert it into NewBB at the end of the block
+    PHINode *NewPHI =
+        PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
+    if (AA)
+      AA->copyValue(PN, NewPHI);
+
+    // NOTE! This loop walks backwards for a reason! First off, this minimizes
+    // the cost of removal if we end up removing a large number of values, and
+    // second off, this ensures that the indices for the incoming values aren't
+    // invalidated when we remove one.
+    for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
+      BasicBlock *IncomingBB = PN->getIncomingBlock(i);
+      if (PredSet.count(IncomingBB)) {
+        Value *V = PN->removeIncomingValue(i, false);
+        NewPHI->addIncoming(V, IncomingBB);
+      }
     }
 
-    // Add an incoming value to the PHI node in the loop for the preheader
-    // edge.
-    PN->addIncoming(InVal, NewBB);
+    PN->addIncoming(NewPHI, NewBB);
   }
 }
 
@@ -538,7 +559,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
     e = pred_end(OrigBB);
   }
 
-  BasicBlock *NewBB2 = 0;
+  BasicBlock *NewBB2 = nullptr;
   if (!NewBB2Preds.empty()) {
     // Create another basic block for the rest of OrigBB's predecessors.
     NewBB2 = BasicBlock::Create(OrigBB->getContext(),
@@ -603,7 +624,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
   for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
        i != e; ++i) {
     Value *V = *i;
-    Instruction *NewBC = 0;
+    Instruction *NewBC = nullptr;
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
       // Return value might be bitcasted. Clone and insert it before the
       // return instruction.
@@ -630,28 +651,30 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
 }
 
 /// SplitBlockAndInsertIfThen - Split the containing block at the
-/// specified instruction - everything before and including Cmp stays
-/// in the old basic block, and everything after Cmp is moved to a
+/// specified instruction - everything before and including SplitBefore stays
+/// in the old basic block, and everything after SplitBefore is moved to a
 /// new block. The two blocks are connected by a conditional branch
 /// (with value of Cmp being the condition).
 /// Before:
 ///   Head
-///   Cmp
+///   SplitBefore
 ///   Tail
 /// After:
 ///   Head
-///   Cmp
-///   if (Cmp)
+///   if (Cond)
 ///     ThenBlock
+///   SplitBefore
 ///   Tail
 ///
 /// If Unreachable is true, then ThenBlock ends with
 /// UnreachableInst, otherwise it branches to Tail.
 /// Returns the NewBasicBlock's terminator.
 
-TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
-    bool Unreachable, MDNode *BranchWeights) {
-  Instruction *SplitBefore = Cmp->getNextNode();
+TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+                                                Instruction *SplitBefore,
+                                                bool Unreachable,
+                                                MDNode *BranchWeights,
+                                                DominatorTree *DT) {
   BasicBlock *Head = SplitBefore->getParent();
   BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
   TerminatorInst *HeadOldTerm = Head->getTerminator();
@@ -662,13 +685,65 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
     CheckTerm = new UnreachableInst(C, ThenBlock);
   else
     CheckTerm = BranchInst::Create(Tail, ThenBlock);
+  CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
   BranchInst *HeadNewTerm =
-    BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
+    BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
+  HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
   HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
   ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+
+  if (DT) {
+    if (DomTreeNode *OldNode = DT->getNode(Head)) {
+      std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+      DomTreeNode *NewNode = DT->addNewBlock(Tail, Head);
+      for (auto Child : Children)
+        DT->changeImmediateDominator(Child, NewNode);
+
+      // Head dominates ThenBlock.
+      DT->addNewBlock(ThenBlock, Head);
+    }
+  }
+
   return CheckTerm;
 }
 
+/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
+/// but also creates the ElseBlock.
+/// Before:
+///   Head
+///   SplitBefore
+///   Tail
+/// After:
+///   Head
+///   if (Cond)
+///     ThenBlock
+///   else
+///     ElseBlock
+///   SplitBefore
+///   Tail
+void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
+                                         TerminatorInst **ThenTerm,
+                                         TerminatorInst **ElseTerm,
+                                         MDNode *BranchWeights) {
+  BasicBlock *Head = SplitBefore->getParent();
+  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+  TerminatorInst *HeadOldTerm = Head->getTerminator();
+  LLVMContext &C = Head->getContext();
+  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+  BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+  *ThenTerm = BranchInst::Create(Tail, ThenBlock);
+  (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+  *ElseTerm = BranchInst::Create(Tail, ElseBlock);
+  (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+  BranchInst *HeadNewTerm =
+    BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
+  HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
+  HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+  ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+}
+
+
 /// GetIfCondition - Given a basic block (BB) with two predecessors,
 /// check to see if the merge at this block is due
 /// to an "if condition".  If so, return the boolean condition that determines
@@ -681,32 +756,32 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
 Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
                              BasicBlock *&IfFalse) {
   PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
-  BasicBlock *Pred1 = NULL;
-  BasicBlock *Pred2 = NULL;
+  BasicBlock *Pred1 = nullptr;
+  BasicBlock *Pred2 = nullptr;
 
   if (SomePHI) {
     if (SomePHI->getNumIncomingValues() != 2)
-      return NULL;
+      return nullptr;
     Pred1 = SomePHI->getIncomingBlock(0);
     Pred2 = SomePHI->getIncomingBlock(1);
   } else {
     pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
     if (PI == PE) // No predecessor
-      return NULL;
+      return nullptr;
     Pred1 = *PI++;
     if (PI == PE) // Only one predecessor
-      return NULL;
+      return nullptr;
     Pred2 = *PI++;
     if (PI != PE) // More than two predecessors
-      return NULL;
+      return nullptr;
   }
 
   // We can only handle branches.  Other control flow will be lowered to
   // branches if possible anyway.
   BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
   BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
-  if (Pred1Br == 0 || Pred2Br == 0)
-    return 0;
+  if (!Pred1Br || !Pred2Br)
+    return nullptr;
 
   // Eliminate code duplication by ensuring that Pred1Br is conditional if
   // either are.
@@ -716,7 +791,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
     // required anyway, we stand no chance of eliminating it, so the xform is
     // probably not profitable.
     if (Pred1Br->isConditional())
-      return 0;
+      return nullptr;
 
     std::swap(Pred1, Pred2);
     std::swap(Pred1Br, Pred2Br);
@@ -726,8 +801,8 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
     // The only thing we have to watch out for here is to make sure that Pred2
     // doesn't have incoming edges from other blocks.  If it does, the condition
     // doesn't dominate BB.
-    if (Pred2->getSinglePredecessor() == 0)
-      return 0;
+    if (!Pred2->getSinglePredecessor())
+      return nullptr;
 
     // If we found a conditional branch predecessor, make sure that it branches
     // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
@@ -742,7 +817,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
     } else {
       // We know that one arm of the conditional goes to BB, so the other must
       // go somewhere unrelated, and this must not be an "if statement".
-      return 0;
+      return nullptr;
     }
 
     return Pred1Br->getCondition();
@@ -752,12 +827,12 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
   // BB.  Don't panic!  If both blocks only have a single (identical)
   // predecessor, and THAT is a conditional branch, then we're all ok!
   BasicBlock *CommonPred = Pred1->getSinglePredecessor();
-  if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
-    return 0;
+  if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor())
+    return nullptr;
 
   // Otherwise, if this is a conditional branch, then we can use it!
   BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
-  if (BI == 0) return 0;
+  if (!BI) return nullptr;
 
   assert(BI->isConditional() && "Two successors but not conditional?");
   if (BI->getSuccessor(0) == Pred1) {
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 0e7f7f7..80bd516 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -15,21 +15,22 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "break-crit-edges"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "break-crit-edges"
+
 STATISTIC(NumBroken, "Number of blocks inserted");
 
 namespace {
@@ -39,10 +40,10 @@ namespace {
       initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnFunction(Function &F);
+    bool runOnFunction(Function &F) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreserved<DominatorTree>();
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addPreserved<LoopInfo>();
 
       // No loop canonicalization guarantees are broken by this pass.
@@ -141,7 +142,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
                                     Pass *P, bool MergeIdenticalEdges,
                                     bool DontDeleteUselessPhis,
                                     bool SplitLandingPads) {
-  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr;
 
   assert(!isa<IndirectBrInst>(TI) &&
          "Cannot split critical edge from IndirectBrInst");
@@ -151,7 +152,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
 
   // Splitting the critical edge to a landing pad block is non-trivial. Don't do
   // it in this generic function.
-  if (DestBB->isLandingPad()) return 0;
+  if (DestBB->isLandingPad()) return nullptr;
 
   // Create a new basic block, linking it into the CFG.
   BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
@@ -207,13 +208,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
 
 
   // If we don't have a pass object, we can't update anything...
-  if (P == 0) return NewBB;
+  if (!P) return NewBB;
 
-  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+  DominatorTreeWrapperPass *DTWP =
+      P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
   LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
 
   // If we have nothing to update, just return.
-  if (DT == 0 && LI == 0)
+  if (!DT && !LI)
     return NewBB;
 
   // Now update analysis information.  Since the only predecessor of NewBB is
@@ -249,7 +252,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
     //
     if (TINode) {       // Don't break unreachable code!
       DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
-      DomTreeNode *DestBBNode = 0;
+      DomTreeNode *DestBBNode = nullptr;
 
       // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
       if (!OtherPreds.empty()) {
@@ -297,9 +300,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
             P->addBasicBlockToLoop(NewBB, LI->getBase());
         }
       }
-      // If TIBB is in a loop and DestBB is outside of that loop, split the
-      // other exit blocks of the loop that also have predecessors outside
-      // the loop, to maintain a LoopSimplify guarantee.
+      // If TIBB is in a loop and DestBB is outside of that loop, we may need
+      // to update LoopSimplify form and LCSSA form.
       if (!TIL->contains(DestBB) &&
           P->mustPreserveAnalysisID(LoopSimplifyID)) {
         assert(!TIL->contains(NewBB) &&
@@ -309,50 +311,35 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
         if (P->mustPreserveAnalysisID(LCSSAID))
           createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
 
-        // For each unique exit block...
-        // FIXME: This code is functionally equivalent to the corresponding
-        // loop in LoopSimplify.
-        SmallVector<BasicBlock *, 4> ExitBlocks;
-        TIL->getExitBlocks(ExitBlocks);
-        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
-          // Collect all the preds that are inside the loop, and note
-          // whether there are any preds outside the loop.
-          SmallVector<BasicBlock *, 4> Preds;
-          bool HasPredOutsideOfLoop = false;
-          BasicBlock *Exit = ExitBlocks[i];
-          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
-               I != E; ++I) {
-            BasicBlock *P = *I;
-            if (TIL->contains(P)) {
-              if (isa<IndirectBrInst>(P->getTerminator())) {
-                Preds.clear();
-                break;
-              }
-              Preds.push_back(P);
-            } else {
-              HasPredOutsideOfLoop = true;
-            }
-          }
-          // If there are any preds not in the loop, we'll need to split
-          // the edges. The Preds.empty() check is needed because a block
-          // may appear multiple times in the list. We can't use
-          // getUniqueExitBlocks above because that depends on LoopSimplify
-          // form, which we're in the process of restoring!
-          if (!Preds.empty() && HasPredOutsideOfLoop) {
-            if (!Exit->isLandingPad()) {
-              BasicBlock *NewExitBB =
-                SplitBlockPredecessors(Exit, Preds, "split", P);
-              if (P->mustPreserveAnalysisID(LCSSAID))
-                createPHIsForSplitLoopExit(Preds, NewExitBB, Exit);
-            } else if (SplitLandingPads) {
-              SmallVector<BasicBlock*, 8> NewBBs;
-              SplitLandingPadPredecessors(Exit, Preds,
-                                          ".split1", ".split2",
-                                          P, NewBBs);
-              if (P->mustPreserveAnalysisID(LCSSAID))
-                createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit);
-            }
+        // The only that we can break LoopSimplify form by splitting a critical
+        // edge is if after the split there exists some edge from TIL to DestBB
+        // *and* the only edge into DestBB from outside of TIL is that of
+        // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
+        // is the new exit block and it has no non-loop predecessors. If the
+        // second isn't true, then DestBB was not in LoopSimplify form prior to
+        // the split as it had a non-loop predecessor. In both of these cases,
+        // the predecessor must be directly in TIL, not in a subloop, or again
+        // LoopSimplify doesn't hold.
+        SmallVector<BasicBlock *, 4> LoopPreds;
+        for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
+             ++I) {
+          BasicBlock *P = *I;
+          if (P == NewBB)
+            continue; // The new block is known.
+          if (LI->getLoopFor(P) != TIL) {
+            // No need to re-simplify, it wasn't to start with.
+            LoopPreds.clear();
+            break;
           }
+          LoopPreds.push_back(P);
+        }
+        if (!LoopPreds.empty()) {
+          assert(!DestBB->isLandingPad() &&
+                 "We don't split edges to landing pads!");
+          BasicBlock *NewExitBB =
+              SplitBlockPredecessors(DestBB, LoopPreds, "split", P);
+          if (P->mustPreserveAnalysisID(LCSSAID))
+            createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
         }
       }
       // LCSSA form was updated above for the case where LoopSimplify is
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 6d13217..be00b695 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -27,7 +27,8 @@ using namespace llvm;
 
 /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
 Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
-  return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+  unsigned AS = V->getType()->getPointerAddressSpace();
+  return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
 }
 
 /// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -35,7 +36,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
 Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
                         const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::strlen))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[2];
@@ -64,7 +65,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
 Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
                          const DataLayout *TD, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::strnlen))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[2];
@@ -94,7 +95,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
 Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
                         const DataLayout *TD, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::strchr))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
@@ -120,7 +121,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
                          IRBuilder<> &B, const DataLayout *TD,
                          const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::strncmp))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[3];
@@ -153,7 +154,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
                         const DataLayout *TD, const TargetLibraryInfo *TLI,
                         StringRef Name) {
   if (!TLI->has(LibFunc::strcpy))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[2];
@@ -177,7 +178,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
                          IRBuilder<> &B, const DataLayout *TD,
                          const TargetLibraryInfo *TLI, StringRef Name) {
   if (!TLI->has(LibFunc::strncpy))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[2];
@@ -204,7 +205,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
                            IRBuilder<> &B, const DataLayout *TD,
                            const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::memcpy_chk))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS;
@@ -232,7 +233,7 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
                         Value *Len, IRBuilder<> &B, const DataLayout *TD,
                         const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::memchr))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS;
@@ -260,7 +261,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
                         Value *Len, IRBuilder<> &B, const DataLayout *TD,
                         const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::memcmp))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[3];
@@ -286,6 +287,21 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
   return CI;
 }
 
+/// Append a suffix to the function name according to the type of 'Op'.
+static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBuffer) {
+  if (!Op->getType()->isDoubleTy()) {
+      NameBuffer += Name;
+
+    if (Op->getType()->isFloatTy())
+      NameBuffer += 'f';
+    else
+      NameBuffer += 'l';
+
+    Name = NameBuffer;
+  }  
+  return;
+}
+
 /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
 /// 'floor').  This function is known to take a single of type matching 'Op' and
 /// returns one value with the same type.  If 'Op' is a long double, 'l' is
@@ -293,15 +309,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
                                   const AttributeSet &Attrs) {
   SmallString<20> NameBuffer;
-  if (!Op->getType()->isDoubleTy()) {
-    // If we need to add a suffix, copy into NameBuffer.
-    NameBuffer += Name;
-    if (Op->getType()->isFloatTy())
-      NameBuffer += 'f'; // floorf
-    else
-      NameBuffer += 'l'; // floorl
-    Name = NameBuffer;
-  }
+  AppendTypeSuffix(Op, Name, NameBuffer);   
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
@@ -314,12 +322,33 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
   return CI;
 }
 
+/// EmitBinaryFloatFnCall - Emit a call to the binary function named 'Name'
+/// (e.g. 'fmin').  This function is known to take type matching 'Op1' and 'Op2'
+/// and return one value with the same type.  If 'Op1/Op2' are long double, 'l'
+/// is added as the suffix of name, if 'Op1/Op2' is a float, we add a 'f'
+/// suffix.
+Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
+                                  IRBuilder<> &B, const AttributeSet &Attrs) {
+  SmallString<20> NameBuffer;
+  AppendTypeSuffix(Op1, Name, NameBuffer);   
+
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *Callee = M->getOrInsertFunction(Name, Op1->getType(),
+                                         Op1->getType(), Op2->getType(), NULL);
+  CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name);
+  CI->setAttributes(Attrs);
+  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
 /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
 /// is an integer.
 Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
                          const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::putchar))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
@@ -341,7 +370,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
 Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
                       const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::puts))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[2];
@@ -365,7 +394,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
 Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
                        const DataLayout *TD, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::fputc))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[2];
@@ -398,7 +427,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
 Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
                        const DataLayout *TD, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::fputs))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[3];
@@ -431,7 +460,7 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
                         IRBuilder<> &B, const DataLayout *TD,
                         const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::fwrite))
-    return 0;
+    return nullptr;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeSet AS[3];
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 1f517d0..f2d5e07 100644
--- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -15,7 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "bypass-slow-division"
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/Function.h"
@@ -24,6 +23,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "bypass-slow-division"
+
 namespace {
   struct DivOpInfo {
     bool SignedOp;
@@ -53,11 +54,11 @@ namespace llvm {
     }
 
     static DivOpInfo getEmptyKey() {
-      return DivOpInfo(false, 0, 0);
+      return DivOpInfo(false, nullptr, nullptr);
     }
 
     static DivOpInfo getTombstoneKey() {
-      return DivOpInfo(true, 0, 0);
+      return DivOpInfo(true, nullptr, nullptr);
     }
 
     static unsigned getHashValue(const DivOpInfo &Val) {
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index d105f5e..5c8f20d 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -17,8 +17,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -26,7 +27,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -88,26 +89,28 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     assert(VMap.count(I) && "No mapping from source argument specified!");
 #endif
 
+  // Copy all attributes other than those stored in the AttributeSet.  We need
+  // to remap the parameter indices of the AttributeSet.
+  AttributeSet NewAttrs = NewFunc->getAttributes();
+  NewFunc->copyAttributesFrom(OldFunc);
+  NewFunc->setAttributes(NewAttrs);
+
   AttributeSet OldAttrs = OldFunc->getAttributes();
   // Clone any argument attributes that are present in the VMap.
-  for (Function::const_arg_iterator I = OldFunc->arg_begin(),
-                                    E = OldFunc->arg_end();
-       I != E; ++I)
-    if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+  for (const Argument &OldArg : OldFunc->args())
+    if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
       AttributeSet attrs =
-          OldAttrs.getParamAttributes(I->getArgNo() + 1);
+          OldAttrs.getParamAttributes(OldArg.getArgNo() + 1);
       if (attrs.getNumSlots() > 0)
-        Anew->addAttr(attrs);
+        NewArg->addAttr(attrs);
     }
 
-  NewFunc->setAttributes(NewFunc->getAttributes()
-                         .addAttributes(NewFunc->getContext(),
-                                        AttributeSet::ReturnIndex,
-                                        OldAttrs.getRetAttributes()));
-  NewFunc->setAttributes(NewFunc->getAttributes()
-                         .addAttributes(NewFunc->getContext(),
-                                        AttributeSet::FunctionIndex,
-                                        OldAttrs.getFnAttributes()));
+  NewFunc->setAttributes(
+      NewFunc->getAttributes()
+          .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex,
+                         OldAttrs.getRetAttributes())
+          .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex,
+                         OldAttrs.getFnAttributes()));
 
   // Loop over all of the basic blocks in the function, cloning them as
   // appropriate.  Note that we save BE this way in order to handle cloning of
@@ -151,6 +154,54 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                        TypeMapper, Materializer);
 }
 
+// Find the MDNode which corresponds to the DISubprogram data that described F.
+static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
+  for (DISubprogram Subprogram : Finder.subprograms()) {
+    if (Subprogram.describes(F)) return Subprogram;
+  }
+  return nullptr;
+}
+
+// Add an operand to an existing MDNode. The new operand will be added at the
+// back of the operand list.
+static void AddOperand(MDNode *Node, Value *Operand) {
+  SmallVector<Value*, 16> Operands;
+  for (unsigned i = 0; i < Node->getNumOperands(); i++) {
+    Operands.push_back(Node->getOperand(i));
+  }
+  Operands.push_back(Operand);
+  MDNode *NewNode = MDNode::get(Node->getContext(), Operands);
+  Node->replaceAllUsesWith(NewNode);
+}
+
+// Clone the module-level debug info associated with OldFunc. The cloned data
+// will point to NewFunc instead.
+static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
+                            ValueToValueMapTy &VMap) {
+  DebugInfoFinder Finder;
+  Finder.processModule(*OldFunc->getParent());
+
+  const MDNode *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
+  if (!OldSubprogramMDNode) return;
+
+  // Ensure that OldFunc appears in the map.
+  // (if it's already there it must point to NewFunc anyway)
+  VMap[OldFunc] = NewFunc;
+  DISubprogram NewSubprogram(MapValue(OldSubprogramMDNode, VMap));
+
+  for (DICompileUnit CU : Finder.compile_units()) {
+    DIArray Subprograms(CU.getSubprograms());
+
+    // If the compile unit's function list contains the old function, it should
+    // also contain the new one.
+    for (unsigned i = 0; i < Subprograms.getNumElements(); i++) {
+      if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) {
+        AddOperand(Subprograms, NewSubprogram);
+      }
+    }
+  }
+}
+
 /// CloneFunction - Return a copy of the specified function, but without
 /// embedding the function into another module.  Also, any references specified
 /// in the VMap are changed to refer to their mapped value instead of the
@@ -188,6 +239,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
       VMap[I] = DestI++;        // Add mapping to VMap
     }
 
+  if (ModuleLevelChanges)
+    CloneDebugInfoMetadata(NewF, F, VMap);
+
   SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
   CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
   return NewF;
@@ -205,17 +259,17 @@ namespace {
     bool ModuleLevelChanges;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
-    const DataLayout *TD;
+    const DataLayout *DL;
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
                           ValueToValueMapTy &valueMap,
                           bool moduleLevelChanges,
                           const char *nameSuffix, 
                           ClonedCodeInfo *codeInfo,
-                          const DataLayout *td)
+                          const DataLayout *DL)
     : NewFunc(newFunc), OldFunc(oldFunc),
       VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
-      NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+      NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) {
     }
 
     /// CloneBlock - The specified block is found to be reachable, clone it and
@@ -272,7 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       // If we can simplify this instruction to some other value, simply add
       // a mapping to that value rather than inserting a new instruction into
       // the basic block.
-      if (Value *V = SimplifyInstruction(NewInst, TD)) {
+      if (Value *V = SimplifyInstruction(NewInst, DL)) {
         // On the off-chance that this simplifies to an instruction in the old
         // function, map it back into the new function.
         if (Value *MappedV = VMap.lookup(V))
@@ -305,7 +359,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       // If the condition was a known constant in the callee...
       ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
       // Or is a known constant in the caller...
-      if (Cond == 0) {
+      if (!Cond) {
         Value *V = VMap[BI->getCondition()];
         Cond = dyn_cast_or_null<ConstantInt>(V);
       }
@@ -321,7 +375,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
     // If switching on a value known constant in the caller.
     ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
-    if (Cond == 0) { // Or known constant after constant prop in the callee...
+    if (!Cond) { // Or known constant after constant prop in the callee...
       Value *V = VMap[SI->getCondition()];
       Cond = dyn_cast_or_null<ConstantInt>(V);
     }
@@ -368,7 +422,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      SmallVectorImpl<ReturnInst*> &Returns,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
-                                     const DataLayout *TD,
+                                     const DataLayout *DL,
                                      Instruction *TheCall) {
   assert(NameSuffix && "NameSuffix cannot be null!");
   
@@ -379,7 +433,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
 #endif
 
   PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
-                            NameSuffix, CodeInfo, TD);
+                            NameSuffix, CodeInfo, DL);
 
   // Clone the entry block, and anything recursively reachable from it.
   std::vector<const BasicBlock*> CloneWorklist;
@@ -400,7 +454,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
        BI != BE; ++BI) {
     Value *V = VMap[BI];
     BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
-    if (NewBB == 0) continue;  // Dead block.
+    if (!NewBB) continue;  // Dead block.
 
     // Add the new block to the new function.
     NewFunc->getBasicBlockList().push_back(NewBB);
@@ -509,7 +563,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
   // node).
   for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
     if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
-      recursivelySimplifyInstruction(PN, TD);
+      recursivelySimplifyInstruction(PN, DL);
 
   // Now that the inlined function body has been fully constructed, go through
   // and zap unconditional fall-through branches.  This happen all the time when
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index 64df089..3f75b3e 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -47,8 +47,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
     GlobalVariable *GV = new GlobalVariable(*New, 
                                             I->getType()->getElementType(),
                                             I->isConstant(), I->getLinkage(),
-                                            (Constant*) 0, I->getName(),
-                                            (GlobalVariable*) 0,
+                                            (Constant*) nullptr, I->getName(),
+                                            (GlobalVariable*) nullptr,
                                             I->getThreadLocalMode(),
                                             I->getType()->getAddressSpace());
     GV->copyAttributesFrom(I);
@@ -67,8 +67,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   // Loop over the aliases in the module
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
        I != E; ++I) {
-    GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(),
-                                      I->getName(), NULL, New);
+    auto *PTy = cast<PointerType>(I->getType());
+    auto *GA =
+        GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+                            I->getLinkage(), I->getName(), New);
     GA->copyAttributesFrom(I);
     VMap[I] = GA;
   }
@@ -106,7 +108,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
        I != E; ++I) {
     GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
     if (const Constant *C = I->getAliasee())
-      GA->setAliasee(MapValue(C, VMap));
+      GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap)));
   }
 
   // And named metadata....
diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 8fa412a..3b15a0a 100644
--- a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -84,7 +84,7 @@ Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
     case 7: // True.
       return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
   }
-  return NULL;
+  return nullptr;
 }
 
 /// PredicatesFoldable - Return true if both predicates match sign or if at
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 6f008644..e70a7d6 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -14,20 +14,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Analysis/Verifier.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -38,6 +38,8 @@
 #include <set>
 using namespace llvm;
 
+#define DEBUG_TYPE "code-extractor"
+
 // Provide a command-line option to aggregate function arguments into a struct
 // for functions produced by the code extractor. This is useful when converting
 // extracted functions to pthread-based code, as only one argument (void*) can
@@ -86,7 +88,7 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
   }
 
 #ifndef NDEBUG
-  for (SetVector<BasicBlock *>::iterator I = llvm::next(Result.begin()),
+  for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
                                          E = Result.end();
        I != E; ++I)
     for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I);
@@ -118,7 +120,7 @@ buildExtractionBlockSet(const RegionNode &RN) {
 }
 
 CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs)
-  : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+  : DT(nullptr), AggregateArgs(AggregateArgs||AggregateArgsOpt),
     Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
 
 CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
@@ -171,9 +173,8 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
         if (definedInCaller(Blocks, *OI))
           Inputs.insert(*OI);
 
-      for (Value::use_iterator UI = II->use_begin(), UE = II->use_end();
-           UI != UE; ++UI)
-        if (!definedInRegion(Blocks, *UI)) {
+      for (User *U : II->users())
+        if (!definedInRegion(Blocks, U)) {
           Outputs.insert(II);
           break;
         }
@@ -369,7 +370,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
     } else
       RewriteVal = AI++;
 
-    std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+    std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
     for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
          use != useE; ++use)
       if (Instruction* inst = dyn_cast<Instruction>(*use))
@@ -389,7 +390,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   // Rewrite branches to basic blocks outside of the loop to new dummy blocks
   // within the new function. This must be done before we lose track of which
   // blocks were originally in the code region.
-  std::vector<User*> Users(header->use_begin(), header->use_end());
+  std::vector<User*> Users(header->user_begin(), header->user_end());
   for (unsigned i = 0, e = Users.size(); i != e; ++i)
     // The BasicBlock which contains the branch is not in the region
     // modify the branch target to a new block
@@ -405,14 +406,13 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
 /// that uses the value within the basic block, and return the predecessor
 /// block associated with that use, or return 0 if none is found.
 static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
-  for (Value::use_iterator UI = Used->use_begin(),
-       UE = Used->use_end(); UI != UE; ++UI) {
-     PHINode *P = dyn_cast<PHINode>(*UI);
+  for (Use &U : Used->uses()) {
+     PHINode *P = dyn_cast<PHINode>(U.getUser());
      if (P && P->getParent() == BB)
-       return P->getIncomingBlock(UI);
+       return P->getIncomingBlock(U);
   }
-  
-  return 0;
+
+  return nullptr;
 }
 
 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
@@ -440,14 +440,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       StructValues.push_back(*i);
     } else {
       AllocaInst *alloca =
-        new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+        new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc",
                        codeReplacer->getParent()->begin()->begin());
       ReloadOutputs.push_back(alloca);
       params.push_back(alloca);
     }
   }
 
-  AllocaInst *Struct = 0;
+  AllocaInst *Struct = nullptr;
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
     std::vector<Type*> ArgTypes;
     for (ValueSet::iterator v = StructValues.begin(),
@@ -457,7 +457,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     // Allocate a struct at the beginning of this function
     Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
     Struct =
-      new AllocaInst(StructArgTy, 0, "structArg",
+      new AllocaInst(StructArgTy, nullptr, "structArg",
                      codeReplacer->getParent()->begin()->begin());
     params.push_back(Struct);
 
@@ -486,7 +486,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
   // Reload the outputs passed in by reference
   for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
-    Value *Output = 0;
+    Value *Output = nullptr;
     if (AggregateArgs) {
       Value *Idx[2];
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
@@ -502,7 +502,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
     Reloads.push_back(load);
     codeReplacer->getInstList().push_back(load);
-    std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+    std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end());
     for (unsigned u = 0, e = Users.size(); u != e; ++u) {
       Instruction *inst = cast<Instruction>(Users[u]);
       if (!Blocks.count(inst->getParent()))
@@ -539,7 +539,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                                          newFunction);
           unsigned SuccNum = switchVal++;
 
-          Value *brVal = 0;
+          Value *brVal = nullptr;
           switch (NumExitBlocks) {
           case 0:
           case 1: break;  // No value needed.
@@ -635,7 +635,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
     // Check if the function should return a value
     if (OldFnRetTy->isVoidTy()) {
-      ReturnInst::Create(Context, 0, TheSwitch);  // Return void
+      ReturnInst::Create(Context, nullptr, TheSwitch);  // Return void
     } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
       // return what we have
       ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
@@ -687,7 +687,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
 
 Function *CodeExtractor::extractCodeRegion() {
   if (!isEligible())
-    return 0;
+    return nullptr;
 
   ValueSet inputs, outputs;
 
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
new file mode 100644
index 0000000..a359424
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -0,0 +1,183 @@
+//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that are used to process llvm.global_ctors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ctor_utils"
+
+namespace llvm {
+
+namespace {
+/// Given a specified llvm.global_ctors list, install the
+/// specified array.
+void installGlobalCtors(GlobalVariable *GCL,
+                        const std::vector<Function *> &Ctors) {
+  // If we made a change, reassemble the initializer list.
+  Constant *CSVals[3];
+
+  StructType *StructTy =
+      cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
+
+  // Create the new init list.
+  std::vector<Constant *> CAList;
+  for (Function *F : Ctors) {
+    Type *Int32Ty = Type::getInt32Ty(GCL->getContext());
+    if (F) {
+      CSVals[0] = ConstantInt::get(Int32Ty, 65535);
+      CSVals[1] = F;
+    } else {
+      CSVals[0] = ConstantInt::get(Int32Ty, 0x7fffffff);
+      CSVals[1] = Constant::getNullValue(StructTy->getElementType(1));
+    }
+    // FIXME: Only allow the 3-field form in LLVM 4.0.
+    size_t NumElts = StructTy->getNumElements();
+    if (NumElts > 2)
+      CSVals[2] = Constant::getNullValue(StructTy->getElementType(2));
+    CAList.push_back(
+        ConstantStruct::get(StructTy, makeArrayRef(CSVals, NumElts)));
+  }
+
+  // Create the array initializer.
+  Constant *CA =
+      ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList);
+
+  // If we didn't change the number of elements, don't create a new GV.
+  if (CA->getType() == GCL->getInitializer()->getType()) {
+    GCL->setInitializer(CA);
+    return;
+  }
+
+  // Create the new global and insert it next to the existing list.
+  GlobalVariable *NGV =
+      new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
+                         CA, "", GCL->getThreadLocalMode());
+  GCL->getParent()->getGlobalList().insert(GCL, NGV);
+  NGV->takeName(GCL);
+
+  // Nuke the old list, replacing any uses with the new one.
+  if (!GCL->use_empty()) {
+    Constant *V = NGV;
+    if (V->getType() != GCL->getType())
+      V = ConstantExpr::getBitCast(V, GCL->getType());
+    GCL->replaceAllUsesWith(V);
+  }
+  GCL->eraseFromParent();
+}
+
+/// Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+std::vector<Function*> parseGlobalCtors(GlobalVariable *GV) {
+  if (GV->getInitializer()->isNullValue())
+    return std::vector<Function *>();
+  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+  std::vector<Function *> Result;
+  Result.reserve(CA->getNumOperands());
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    ConstantStruct *CS = cast<ConstantStruct>(*i);
+    Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+  }
+  return Result;
+}
+
+/// Find the llvm.global_ctors list, verifying that all initializers have an
+/// init priority of 65535.
+GlobalVariable *findGlobalCtors(Module &M) {
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (!GV)
+    return nullptr;
+
+  // Verify that the initializer is simple enough for us to handle. We are
+  // only allowed to optimize the initializer if it is unique.
+  if (!GV->hasUniqueInitializer())
+    return nullptr;
+
+  if (isa<ConstantAggregateZero>(GV->getInitializer()))
+    return GV;
+  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    if (isa<ConstantAggregateZero>(*i))
+      continue;
+    ConstantStruct *CS = cast<ConstantStruct>(*i);
+    if (isa<ConstantPointerNull>(CS->getOperand(1)))
+      continue;
+
+    // Must have a function or null ptr.
+    if (!isa<Function>(CS->getOperand(1)))
+      return nullptr;
+
+    // Init priority must be standard.
+    ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+    if (CI->getZExtValue() != 65535)
+      return nullptr;
+  }
+
+  return GV;
+}
+} // namespace
+
+/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
+/// entries for which it returns true.  Return true if anything changed.
+bool optimizeGlobalCtorsList(Module &M,
+                             function_ref<bool(Function *)> ShouldRemove) {
+  GlobalVariable *GlobalCtors = findGlobalCtors(M);
+  if (!GlobalCtors)
+    return false;
+
+  std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors);
+  if (Ctors.empty())
+    return false;
+
+  bool MadeChange = false;
+
+  // Loop over global ctors, optimizing them when we can.
+  for (unsigned i = 0; i != Ctors.size(); ++i) {
+    Function *F = Ctors[i];
+    // Found a null terminator in the middle of the list, prune off the rest of
+    // the list.
+    if (!F) {
+      if (i != Ctors.size() - 1) {
+        Ctors.resize(i + 1);
+        MadeChange = true;
+      }
+      break;
+    }
+    DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
+
+    // We cannot simplify external ctor functions.
+    if (F->empty())
+      continue;
+
+    // If we can evaluate the ctor at compile time, do.
+    if (ShouldRemove(F)) {
+      Ctors.erase(Ctors.begin() + i);
+      MadeChange = true;
+      --i;
+      continue;
+    }
+  }
+
+  if (!MadeChange)
+    return false;
+
+  installGlobalCtors(GlobalCtors, Ctors);
+  return true;
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 0723b35..9972b22 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 /// DemoteRegToStack - This function takes a virtual register computed by an
@@ -25,23 +25,23 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
                                    Instruction *AllocaPoint) {
   if (I.use_empty()) {
     I.eraseFromParent();
-    return 0;
+    return nullptr;
   }
 
   // Create a stack slot to hold the value.
   AllocaInst *Slot;
   if (AllocaPoint) {
-    Slot = new AllocaInst(I.getType(), 0,
+    Slot = new AllocaInst(I.getType(), nullptr,
                           I.getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = I.getParent()->getParent();
-    Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+    Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem",
                           F->getEntryBlock().begin());
   }
 
   // Change all of the users of the instruction to read from the stack slot.
   while (!I.use_empty()) {
-    Instruction *U = cast<Instruction>(I.use_back());
+    Instruction *U = cast<Instruction>(I.user_back());
     if (PHINode *PN = dyn_cast<PHINode>(U)) {
       // If this is a PHI node, we can't insert a load of the value before the
       // use.  Instead insert the load in the predecessor block corresponding
@@ -56,7 +56,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
         if (PN->getIncomingValue(i) == &I) {
           Value *&V = Loads[PN->getIncomingBlock(i)];
-          if (V == 0) {
+          if (!V) {
             // Insert the load into the predecessor block
             V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
                              PN->getIncomingBlock(i)->getTerminator());
@@ -110,17 +110,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
 AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   if (P->use_empty()) {
     P->eraseFromParent();
-    return 0;
+    return nullptr;
   }
 
   // Create a stack slot to hold the value.
   AllocaInst *Slot;
   if (AllocaPoint) {
-    Slot = new AllocaInst(P->getType(), 0,
+    Slot = new AllocaInst(P->getType(), nullptr,
                           P->getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = P->getParent()->getParent();
-    Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+    Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem",
                           F->getEntryBlock().begin());
   }
 
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 1da226b..51ead40 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "flattencfg"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -22,16 +21,19 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "flattencfg"
+
 namespace {
 class FlattenCFGOpt {
   AliasAnalysis *AA;
   /// \brief Use parallel-and or parallel-or to generate conditions for
   /// conditional branches.
-  bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+  bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
+                            Pass *P = nullptr);
   /// \brief If \param BB is the merge block of an if-region, attempt to merge
   /// the if-region with an adjacent if-region upstream if two if-regions
   /// contain identical instructions.
-  bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+  bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr);
   /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
   /// are from two if-regions whose entry blocks are \p Head1 and \p
   /// Head2.  \returns true if \p Block1 and \p Block2 contain identical
@@ -126,9 +128,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
   if (PHI)
     return false; // For simplicity, avoid cases containing PHI nodes.
 
-  BasicBlock *LastCondBlock = NULL;
-  BasicBlock *FirstCondBlock = NULL;
-  BasicBlock *UnCondBlock = NULL;
+  BasicBlock *LastCondBlock = nullptr;
+  BasicBlock *FirstCondBlock = nullptr;
+  BasicBlock *UnCondBlock = nullptr;
   int Idx = -1;
 
   // Check predecessors of \param BB.
@@ -240,7 +242,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
       BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
       CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
       CmpInst::Predicate Predicate = CI->getPredicate();
-      // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+      // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
       if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
         CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
         BI->swapSuccessors();
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 5f0a563..12057e4 100644
--- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -9,9 +9,9 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 
 using namespace llvm;
@@ -35,9 +35,8 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
   if (isa<GlobalValue>(C))
     return false;
 
-  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
-       ++UI)
-    if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+  for (const User *U : C->users())
+    if (const Constant *CU = dyn_cast<Constant>(U)) {
       if (!isSafeToDestroyConstant(CU))
         return false;
     } else
@@ -47,10 +46,9 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
 
 static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
                              SmallPtrSet<const PHINode *, 16> &PhiUsers) {
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
-       ++UI) {
-    const User *U = *UI;
-    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+  for (const Use &U : V->uses()) {
+    const User *UR = U.getUser();
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
       GS.HasNonInstructionUser = true;
 
       // If the result of the constantexpr isn't pointer type, then we won't
@@ -60,10 +58,10 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
 
       if (analyzeGlobalAux(CE, GS, PhiUsers))
         return true;
-    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+    } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
       if (!GS.HasMultipleAccessingFunctions) {
         const Function *F = I->getParent()->getParent();
-        if (GS.AccessingFunction == 0)
+        if (!GS.AccessingFunction)
           GS.AccessingFunction = F;
         else if (GS.AccessingFunction != F)
           GS.HasMultipleAccessingFunctions = true;
@@ -150,13 +148,13 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
           return true;
         GS.StoredType = GlobalStatus::Stored;
       } else if (ImmutableCallSite C = I) {
-        if (!C.isCallee(UI))
+        if (!C.isCallee(&U))
           return true;
         GS.IsLoaded = true;
       } else {
         return true; // Any other non-load instruction might take address!
       }
-    } else if (const Constant *C = dyn_cast<Constant>(U)) {
+    } else if (const Constant *C = dyn_cast<Constant>(UR)) {
       GS.HasNonInstructionUser = true;
       // We might have a dead and dangling constant hanging off of here.
       if (!isSafeToDestroyConstant(C))
@@ -178,6 +176,6 @@ bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
 
 GlobalStatus::GlobalStatus()
     : IsCompared(false), IsLoaded(false), StoredType(NotStored),
-      StoredOnceValue(0), AccessingFunction(0),
+      StoredOnceValue(nullptr), AccessingFunction(nullptr),
       HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
       Ordering(NotAtomic) {}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index d021bce..f0a9f2b 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -17,17 +17,18 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -51,8 +52,8 @@ namespace {
 
   public:
     InvokeInliningInfo(InvokeInst *II)
-      : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
-        CallerLPad(0), InnerEHValuesPHI(0) {
+      : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
+        CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
       // If there are PHI nodes in the unwind destination block, we need to keep
       // track of which values came into them from the invoke before removing
       // the edge from this block.
@@ -144,7 +145,6 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
 void InvokeInliningInfo::forwardResume(ResumeInst *RI,
                                SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
   BasicBlock *Dest = getInnerResumeDest();
-  LandingPadInst *OuterLPad = getLandingPadInst();
   BasicBlock *Src = RI->getParent();
 
   BranchInst::Create(Dest, Src);
@@ -155,16 +155,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
 
   InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
   RI->eraseFromParent();
-
-  // Append the clauses from the outer landing pad instruction into the inlined
-  // landing pad instructions.
-  for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
-         E = InlinedLPads.end(); I != E; ++I) {
-    LandingPadInst *InlinedLPad = *I;
-    for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
-         OuterIdx != OuterNum; ++OuterIdx)
-      InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
-  }
 }
 
 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -172,22 +162,11 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
 /// invokes.  This function analyze BB to see if there are any calls, and if so,
 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
 /// nodes in that block with the values specified in InvokeDestPHIValues.
-///
-/// Returns true to indicate that the next block should be skipped.
-static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
                                                    InvokeInliningInfo &Invoke) {
-  LandingPadInst *LPI = Invoke.getLandingPadInst();
-
   for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
     Instruction *I = BBI++;
 
-    if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
-      unsigned NumClauses = LPI->getNumClauses();
-      L->reserveClauses(NumClauses);
-      for (unsigned i = 0; i != NumClauses; ++i)
-        L->addClause(LPI->getClause(i));
-    }
-
     // We only need to check for function calls: inlined invoke
     // instructions require no special handling.
     CallInst *CI = dyn_cast<CallInst>(I);
@@ -210,6 +189,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
                                         Invoke.getOuterResumeDest(),
                                         InvokeArgs, CI->getName(), BB);
+    II->setDebugLoc(CI->getDebugLoc());
     II->setCallingConv(CI->getCallingConv());
     II->setAttributes(CI->getAttributes());
     
@@ -223,10 +203,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     // Update any PHI nodes in the exceptional block to indicate that there is
     // now a new entry in them.
     Invoke.addIncomingPHIValuesFor(BB);
-    return false;
+    return;
   }
-
-  return false;
 }
 
 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
@@ -252,13 +230,23 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
     if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
       InlinedLPads.insert(II->getLandingPadInst());
 
+  // Append the clauses from the outer landing pad instruction into the inlined
+  // landing pad instructions.
+  LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
+  for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+         E = InlinedLPads.end(); I != E; ++I) {
+    LandingPadInst *InlinedLPad = *I;
+    unsigned OuterNum = OuterLPad->getNumClauses();
+    InlinedLPad->reserveClauses(OuterNum);
+    for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
+      InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+    if (OuterLPad->isCleanup())
+      InlinedLPad->setCleanup(true);
+  }
+
   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
     if (InlinedCodeInfo.ContainsCalls)
-      if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
-        // Honor a request to skip the next block.
-        ++BB;
-        continue;
-      }
+      HandleCallsInBlockInlinedThroughInvoke(BB, Invoke);
 
     // Forward any resumes that are remaining here.
     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
@@ -303,13 +291,13 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 
     ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
     // Only copy the edge if the call was inlined!
-    if (VMI == VMap.end() || VMI->second == 0)
+    if (VMI == VMap.end() || VMI->second == nullptr)
       continue;
     
     // If the call was inlined, but then constant folded, there is no edge to
     // add.  Check for this case.
     Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
-    if (NewCall == 0) continue;
+    if (!NewCall) continue;
 
     // Remember that this call site got inlined for the client of
     // InlineFunction.
@@ -320,7 +308,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
     // happens, set the callee of the new call site to a more precise
     // destination.  This can also happen if the call graph node of the caller
     // was just unnecessarily imprecise.
-    if (I->second->getFunction() == 0)
+    if (!I->second->getFunction())
       if (Function *F = CallSite(NewCall).getCalledFunction()) {
         // Indirect call site resolved to direct call.
         CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
@@ -336,13 +324,44 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   CallerNode->removeCallEdgeFor(CS);
 }
 
+static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
+                                    BasicBlock *InsertBlock,
+                                    InlineFunctionInfo &IFI) {
+  LLVMContext &Context = Src->getContext();
+  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
+  Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) };
+  Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+  IRBuilder<> builder(InsertBlock->begin());
+  Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp");
+  Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp");
+
+  Value *Size;
+  if (IFI.DL == nullptr)
+    Size = ConstantExpr::getSizeOf(AggTy);
+  else
+    Size = ConstantInt::get(Type::getInt64Ty(Context),
+                            IFI.DL->getTypeStoreSize(AggTy));
+
+  // Always generate a memcpy of alignment 1 here because we don't know
+  // the alignment of the src pointer.  Other optimizations can infer
+  // better alignment.
+  Value *CallArgs[] = {
+    DstCast, SrcCast, Size,
+    ConstantInt::get(Type::getInt32Ty(Context), 1),
+    ConstantInt::getFalse(Context) // isVolatile
+  };
+  builder.CreateCall(MemCpyFn, CallArgs);
+}
+
 /// HandleByValArgument - When inlining a call site that has a byval argument,
 /// we have to make the implicit memcpy explicit by adding it.
 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
                                   const Function *CalledFunc,
                                   InlineFunctionInfo &IFI,
                                   unsigned ByValAlignment) {
-  Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+  PointerType *ArgTy = cast<PointerType>(Arg->getType());
+  Type *AggTy = ArgTy->getElementType();
 
   // If the called function is readonly, then it could not mutate the caller's
   // copy of the byval'd memory.  In this case, it is safe to elide the copy and
@@ -357,21 +376,17 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
     // If the pointer is already known to be sufficiently aligned, or if we can
     // round it up to a larger alignment, then we don't need a temporary.
     if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
-                                   IFI.TD) >= ByValAlignment)
+                                   IFI.DL) >= ByValAlignment)
       return Arg;
     
     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
     // for code quality, but rarely happens and is required for correctness.
   }
-  
-  LLVMContext &Context = Arg->getContext();
 
-  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
-  
   // Create the alloca.  If we have DataLayout, use nice alignment.
   unsigned Align = 1;
-  if (IFI.TD)
-    Align = IFI.TD->getPrefTypeAlignment(AggTy);
+  if (IFI.DL)
+    Align = IFI.DL->getPrefTypeAlignment(AggTy);
   
   // If the byval had an alignment specified, we *must* use at least that
   // alignment, as it is required by the byval argument (and uses of the
@@ -380,32 +395,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   
   Function *Caller = TheCall->getParent()->getParent(); 
   
-  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
+  Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), 
                                     &*Caller->begin()->begin());
-  // Emit a memcpy.
-  Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
-  Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
-                                                 Intrinsic::memcpy, 
-                                                 Tys);
-  Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
-  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
-  
-  Value *Size;
-  if (IFI.TD == 0)
-    Size = ConstantExpr::getSizeOf(AggTy);
-  else
-    Size = ConstantInt::get(Type::getInt64Ty(Context),
-                            IFI.TD->getTypeStoreSize(AggTy));
-  
-  // Always generate a memcpy of alignment 1 here because we don't know
-  // the alignment of the src pointer.  Other optimizations can infer
-  // better alignment.
-  Value *CallArgs[] = {
-    DestCast, SrcCast, Size,
-    ConstantInt::get(Type::getInt32Ty(Context), 1),
-    ConstantInt::getFalse(Context) // isVolatile
-  };
-  IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+  IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
   
   // Uses of the argument in the function should use our new alloca
   // instead.
@@ -415,9 +407,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
 // isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
 // intrinsic.
 static bool isUsedByLifetimeMarker(Value *V) {
-  for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
-       ++UI) {
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+  for (User *U : V->users()) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
       switch (II->getIntrinsicID()) {
       default: break;
       case Intrinsic::lifetime_start:
@@ -432,16 +423,17 @@ static bool isUsedByLifetimeMarker(Value *V) {
 // hasLifetimeMarkers - Check whether the given alloca already has
 // lifetime.start or lifetime.end intrinsics.
 static bool hasLifetimeMarkers(AllocaInst *AI) {
-  Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
-  if (AI->getType() == Int8PtrTy)
+  Type *Ty = AI->getType();
+  Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
+                                       Ty->getPointerAddressSpace());
+  if (Ty == Int8PtrTy)
     return isUsedByLifetimeMarker(AI);
 
   // Do a scan to find all the casts to i8*.
-  for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
-       ++I) {
-    if (I->getType() != Int8PtrTy) continue;
-    if (I->stripPointerCasts() != AI) continue;
-    if (isUsedByLifetimeMarker(*I))
+  for (User *U : AI->users()) {
+    if (U->getType() != Int8PtrTy) continue;
+    if (U->stripPointerCasts() != AI) continue;
+    if (isUsedByLifetimeMarker(U))
       return true;
   }
   return false;
@@ -475,7 +467,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
          BI != BE; ++BI) {
       DebugLoc DL = BI->getDebugLoc();
-      if (!DL.isUnknown()) {
+      if (DL.isUnknown()) {
+        // If the inlined instruction has no line number, make it look as if it
+        // originates from the call location. This is important for
+        // ((__always_inline__, __nodebug__)) functions which must use caller
+        // location for all instructions in their function body.
+        BI->setDebugLoc(TheCallDL);
+      } else {
         BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
         if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
           LLVMContext &Ctx = BI->getContext();
@@ -488,6 +486,33 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
   }
 }
 
+/// Returns a musttail call instruction if one immediately precedes the given
+/// return instruction with an optional bitcast instruction between them.
+static CallInst *getPrecedingMustTailCall(ReturnInst *RI) {
+  Instruction *Prev = RI->getPrevNode();
+  if (!Prev)
+    return nullptr;
+
+  if (Value *RV = RI->getReturnValue()) {
+    if (RV != Prev)
+      return nullptr;
+
+    // Look through the optional bitcast.
+    if (auto *BI = dyn_cast<BitCastInst>(Prev)) {
+      RV = BI->getOperand(0);
+      Prev = BI->getPrevNode();
+      if (!Prev || RV != Prev)
+        return nullptr;
+    }
+  }
+
+  if (auto *CI = dyn_cast<CallInst>(Prev)) {
+    if (CI->isMustTailCall())
+      return CI;
+  }
+  return nullptr;
+}
+
 /// InlineFunction - This function inlines the called function into the basic
 /// block of the caller.  This returns false if it is not possible to inline
 /// this call.  The program is still in a well defined state if this occurs
@@ -507,15 +532,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   IFI.reset();
   
   const Function *CalledFunc = CS.getCalledFunction();
-  if (CalledFunc == 0 ||          // Can't inline external function or indirect
+  if (!CalledFunc ||              // Can't inline external function or indirect
       CalledFunc->isDeclaration() || // call, or call to a vararg function!
       CalledFunc->getFunctionType()->isVarArg()) return false;
 
-  // If the call to the callee is not a tail call, we must clear the 'tail'
-  // flags on any calls that we inline.
-  bool MustClearTailCallFlags =
-    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
-
   // If the call to the callee cannot throw, set the 'nounwind' flag on any
   // calls that we inline.
   bool MarkNoUnwind = CS.doesNotThrow();
@@ -535,7 +555,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   }
 
   // Get the personality function from the callee if it contains a landing pad.
-  Value *CalleePersonality = 0;
+  Value *CalleePersonality = nullptr;
   for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
        I != E; ++I)
     if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
@@ -578,6 +598,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
   { // Scope to destroy VMap after cloning.
     ValueToValueMapTy VMap;
+    // Keep a list of pair (dst, src) to emit byval initializations.
+    SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
 
     assert(CalledFunc->arg_size() == CS.arg_size() &&
            "No varargs calls can be inlined!");
@@ -597,11 +619,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       if (CS.isByValArgument(ArgNo)) {
         ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
                                         CalledFunc->getParamAlignment(ArgNo+1));
- 
-        // Calls that we inline may use the new alloca, so we need to clear
-        // their 'tail' flags if HandleByValArgument introduced a new alloca and
-        // the callee has calls.
-        MustClearTailCallFlags |= ActualArg != *AI;
+        if (ActualArg != *AI)
+          ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
       }
 
       VMap[I] = ActualArg;
@@ -613,11 +632,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // happy with whatever the cloner can do.
     CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
                               /*ModuleLevelChanges=*/false, Returns, ".i",
-                              &InlinedFunctionInfo, IFI.TD, TheCall);
+                              &InlinedFunctionInfo, IFI.DL, TheCall);
 
     // Remember the first block that is newly cloned over.
     FirstNewBlock = LastBlock; ++FirstNewBlock;
 
+    // Inject byval arguments initialization.
+    for (std::pair<Value*, Value*> &Init : ByValInit)
+      HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+                              FirstNewBlock, IFI);
+
     // Update the callgraph if requested.
     if (IFI.CG)
       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
@@ -635,7 +659,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     for (BasicBlock::iterator I = FirstNewBlock->begin(),
          E = FirstNewBlock->end(); I != E; ) {
       AllocaInst *AI = dyn_cast<AllocaInst>(I++);
-      if (AI == 0) continue;
+      if (!AI) continue;
       
       // If the alloca is now dead, remove it.  This often occurs due to code
       // specialization.
@@ -667,6 +691,45 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     }
   }
 
+  bool InlinedMustTailCalls = false;
+  if (InlinedFunctionInfo.ContainsCalls) {
+    CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
+    if (CallInst *CI = dyn_cast<CallInst>(TheCall))
+      CallSiteTailKind = CI->getTailCallKind();
+
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
+         ++BB) {
+      for (Instruction &I : *BB) {
+        CallInst *CI = dyn_cast<CallInst>(&I);
+        if (!CI)
+          continue;
+
+        // We need to reduce the strength of any inlined tail calls.  For
+        // musttail, we have to avoid introducing potential unbounded stack
+        // growth.  For example, if functions 'f' and 'g' are mutually recursive
+        // with musttail, we can inline 'g' into 'f' so long as we preserve
+        // musttail on the cloned call to 'f'.  If either the inlined call site
+        // or the cloned call site is *not* musttail, the program already has
+        // one frame of stack growth, so it's safe to remove musttail.  Here is
+        // a table of example transformations:
+        //
+        //    f -> musttail g -> musttail f  ==>  f -> musttail f
+        //    f -> musttail g ->     tail f  ==>  f ->     tail f
+        //    f ->          g -> musttail f  ==>  f ->          f
+        //    f ->          g ->     tail f  ==>  f ->          f
+        CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
+        ChildTCK = std::min(CallSiteTailKind, ChildTCK);
+        CI->setTailCallKind(ChildTCK);
+        InlinedMustTailCalls |= CI->isMustTailCall();
+
+        // Calls inlined through a 'nounwind' call site should be marked
+        // 'nounwind'.
+        if (MarkNoUnwind)
+          CI->setDoesNotThrow();
+      }
+    }
+  }
+
   // Leave lifetime markers for the static alloca's, scoping them to the
   // function we just inlined.
   if (InsertLifetime && !IFI.StaticAllocas.empty()) {
@@ -680,12 +743,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
         continue;
 
       // Try to determine the size of the allocation.
-      ConstantInt *AllocaSize = 0;
+      ConstantInt *AllocaSize = nullptr;
       if (ConstantInt *AIArraySize =
           dyn_cast<ConstantInt>(AI->getArraySize())) {
-        if (IFI.TD) {
+        if (IFI.DL) {
           Type *AllocaType = AI->getAllocatedType();
-          uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+          uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
           uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
           assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
           // Check that array size doesn't saturate uint64_t and doesn't
@@ -699,9 +762,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       }
 
       builder.CreateLifetimeStart(AI, AllocaSize);
-      for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
-        IRBuilder<> builder(Returns[ri]);
-        builder.CreateLifetimeEnd(AI, AllocaSize);
+      for (ReturnInst *RI : Returns) {
+        // Don't insert llvm.lifetime.end calls between a musttail call and a
+        // return.  The return kills all local allocas.
+        if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+          continue;
+        IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
       }
     }
   }
@@ -720,33 +786,56 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
     // Insert a call to llvm.stackrestore before any return instructions in the
     // inlined function.
-    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
-      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+    for (ReturnInst *RI : Returns) {
+      // Don't insert llvm.stackrestore calls between a musttail call and a
+      // return.  The return will restore the stack pointer.
+      if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+        continue;
+      IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
     }
   }
 
-  // If we are inlining tail call instruction through a call site that isn't
-  // marked 'tail', we must remove the tail marker for any calls in the inlined
-  // code.  Also, calls inlined through a 'nounwind' call site should be marked
-  // 'nounwind'.
-  if (InlinedFunctionInfo.ContainsCalls &&
-      (MustClearTailCallFlags || MarkNoUnwind)) {
-    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
-         BB != E; ++BB)
-      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-        if (CallInst *CI = dyn_cast<CallInst>(I)) {
-          if (MustClearTailCallFlags)
-            CI->setTailCall(false);
-          if (MarkNoUnwind)
-            CI->setDoesNotThrow();
-        }
-  }
-
   // If we are inlining for an invoke instruction, we must make sure to rewrite
   // any call instructions into invoke instructions.
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
     HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
 
+  // Handle any inlined musttail call sites.  In order for a new call site to be
+  // musttail, the source of the clone and the inlined call site must have been
+  // musttail.  Therefore it's safe to return without merging control into the
+  // phi below.
+  if (InlinedMustTailCalls) {
+    // Check if we need to bitcast the result of any musttail calls.
+    Type *NewRetTy = Caller->getReturnType();
+    bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
+
+    // Handle the returns preceded by musttail calls separately.
+    SmallVector<ReturnInst *, 8> NormalReturns;
+    for (ReturnInst *RI : Returns) {
+      CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI);
+      if (!ReturnedMustTail) {
+        NormalReturns.push_back(RI);
+        continue;
+      }
+      if (!NeedBitCast)
+        continue;
+
+      // Delete the old return and any preceding bitcast.
+      BasicBlock *CurBB = RI->getParent();
+      auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
+      RI->eraseFromParent();
+      if (OldCast)
+        OldCast->eraseFromParent();
+
+      // Insert a new bitcast and return with the right type.
+      IRBuilder<> Builder(CurBB);
+      Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
+    }
+
+    // Leave behind the normal returns so we can merge control flow.
+    std::swap(Returns, NormalReturns);
+  }
+
   // If we cloned in _exactly one_ basic block, and if that block ends in a
   // return instruction, we splice the body of the inlined callee directly into
   // the calling basic block.
@@ -790,7 +879,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // "starter" and "ender" blocks.  How we accomplish this depends on whether
   // this is an invoke instruction or a call instruction.
   BasicBlock *AfterCallBB;
-  BranchInst *CreatedBranchToNormalDest = NULL;
+  BranchInst *CreatedBranchToNormalDest = nullptr;
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
 
     // Add an unconditional branch to make this look like the CallInst case...
@@ -829,7 +918,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // any users of the original call/invoke instruction.
   Type *RTy = CalledFunc->getReturnType();
 
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
   if (Returns.size() > 1) {
     // The PHI node should go at the front of the new basic block to merge all
     // possible incoming values.
@@ -902,6 +991,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // Since we are now done with the Call/Invoke, we can delete it.
   TheCall->eraseFromParent();
 
+  // If we inlined any musttail calls and the original return is now
+  // unreachable, delete it.  It can only contain a bitcast and ret.
+  if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
+    AfterCallBB->eraseFromParent();
+
   // We should always be able to fold the entry block of the function into the
   // single predecessor of the block...
   assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
@@ -922,7 +1016,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // the entries are the same or undef).  If so, remove the PHI so it doesn't
   // block other optimizations.
   if (PHI) {
-    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+    if (Value *V = SimplifyInstruction(PHI, IFI.DL)) {
       PHI->replaceAllUsesWith(V);
       PHI->eraseFromParent();
     }
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index a020bc7..da890a2 100644
--- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -27,11 +27,11 @@ namespace {
       initializeInstNamerPass(*PassRegistry::getPassRegistry());
     }
     
-    void getAnalysisUsage(AnalysisUsage &Info) const {
+    void getAnalysisUsage(AnalysisUsage &Info) const override {
       Info.setPreservesAll();
     }
 
-    bool runOnFunction(Function &F) {
+    bool runOnFunction(Function &F) override {
       for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
            AI != AE; ++AI)
         if (!AI->hasName() && !AI->getType()->isVoidTy())
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index 3cb8ded..9f91eeb 100644
--- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -7,22 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains an implementation of 32bit scalar integer division for
-// targets that don't have native support. It's largely derived from
-// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
-// amount of control flow
+// This file contains an implementation of 32bit and 64bit scalar integer
+// division for targets that don't have native support. It's largely derived
+// from compiler-rt's implementations of __udivsi3 and __udivmoddi4,
+// but hand-tuned for targets that prefer less control flow.
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "integer-division"
 #include "llvm/Transforms/Utils/IntegerDivision.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include <utility>
 
 using namespace llvm;
 
+#define DEBUG_TYPE "integer-division"
+
 /// Generate code to compute the remainder of two signed integers. Returns the
 /// remainder, which will have the sign of the dividend. Builder's insert point
 /// should be pointing where the caller wants code generated, e.g. at the srem
@@ -31,7 +33,18 @@ using namespace llvm;
 /// be expanded if the user wishes
 static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
                                           IRBuilder<> &Builder) {
-  ConstantInt *ThirtyOne = Builder.getInt32(31);
+  unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+  ConstantInt *Shift;
+
+  if (BitWidth == 64) {
+    Shift = Builder.getInt64(63);
+  } else {
+    assert(BitWidth == 32 && "Unexpected bit width");
+    Shift = Builder.getInt32(31);
+  }
+
+  // Following instructions are generated for both i32 (shift 31) and
+  // i64 (shift 63).
 
   // ;   %dividend_sgn = ashr i32 %dividend, 31
   // ;   %divisor_sgn  = ashr i32 %divisor, 31
@@ -42,8 +55,8 @@ static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
   // ;   %urem         = urem i32 %dividend, %divisor
   // ;   %xored        = xor i32 %urem, %dividend_sgn
   // ;   %srem         = sub i32 %xored, %dividend_sgn
-  Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
-  Value *DivisorSign  = Builder.CreateAShr(Divisor, ThirtyOne);
+  Value *DividendSign = Builder.CreateAShr(Dividend, Shift);
+  Value *DivisorSign  = Builder.CreateAShr(Divisor, Shift);
   Value *DvdXor       = Builder.CreateXor(Dividend, DividendSign);
   Value *DvsXor       = Builder.CreateXor(Divisor, DivisorSign);
   Value *UDividend    = Builder.CreateSub(DvdXor, DividendSign);
@@ -68,6 +81,8 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
                                              IRBuilder<> &Builder) {
   // Remainder = Dividend - Quotient*Divisor
 
+  // Following instructions are generated for both i32 and i64
+
   // ;   %quotient  = udiv i32 %dividend, %divisor
   // ;   %product   = mul i32 %divisor, %quotient
   // ;   %remainder = sub i32 %dividend, %product
@@ -88,9 +103,20 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
 /// present, i.e. not folded), ready to be expanded if the user wishes.
 static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
                                          IRBuilder<> &Builder) {
-  // Implementation taken from compiler-rt's __divsi3
+  // Implementation taken from compiler-rt's __divsi3 and __divdi3
+
+  unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+  ConstantInt *Shift;
+
+  if (BitWidth == 64) {
+    Shift = Builder.getInt64(63);
+  } else {
+    assert(BitWidth == 32 && "Unexpected bit width");
+    Shift = Builder.getInt32(31);
+  }
 
-  ConstantInt *ThirtyOne = Builder.getInt32(31);
+  // Following instructions are generated for both i32 (shift 31) and
+  // i64 (shift 63).
 
   // ;   %tmp    = ashr i32 %dividend, 31
   // ;   %tmp1   = ashr i32 %divisor, 31
@@ -102,8 +128,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
   // ;   %q_mag  = udiv i32 %u_dvnd, %u_dvsr
   // ;   %tmp4   = xor i32 %q_mag, %q_sgn
   // ;   %q      = sub i32 %tmp4, %q_sgn
-  Value *Tmp    = Builder.CreateAShr(Dividend, ThirtyOne);
-  Value *Tmp1   = Builder.CreateAShr(Divisor, ThirtyOne);
+  Value *Tmp    = Builder.CreateAShr(Dividend, Shift);
+  Value *Tmp1   = Builder.CreateAShr(Divisor, Shift);
   Value *Tmp2   = Builder.CreateXor(Tmp, Dividend);
   Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
   Value *Tmp3   = Builder.CreateXor(Tmp1, Divisor);
@@ -119,9 +145,9 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
   return Q;
 }
 
-/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
-/// quotient, rounded towards 0. Builder's insert point should be pointing where
-/// the caller wants code generated, e.g. at the udiv instruction.
+/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers.
+/// Returns the quotient, rounded towards 0. Builder's insert point should
+/// point where the caller wants code generated, e.g. at the udiv instruction.
 static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
                                            IRBuilder<> &Builder) {
   // The basic algorithm can be found in the compiler-rt project's
@@ -129,18 +155,33 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   // that's been hand-tuned to lessen the amount of control flow involved.
 
   // Some helper values
-  IntegerType *I32Ty = Builder.getInt32Ty();
+  IntegerType *DivTy = cast<IntegerType>(Dividend->getType());
+  unsigned BitWidth = DivTy->getBitWidth();
+
+  ConstantInt *Zero;
+  ConstantInt *One;
+  ConstantInt *NegOne;
+  ConstantInt *MSB;
+
+  if (BitWidth == 64) {
+    Zero      = Builder.getInt64(0);
+    One       = Builder.getInt64(1);
+    NegOne    = ConstantInt::getSigned(DivTy, -1);
+    MSB       = Builder.getInt64(63);
+  } else {
+    assert(BitWidth == 32 && "Unexpected bit width");
+    Zero      = Builder.getInt32(0);
+    One       = Builder.getInt32(1);
+    NegOne    = ConstantInt::getSigned(DivTy, -1);
+    MSB       = Builder.getInt32(31);
+  }
 
-  ConstantInt *Zero      = Builder.getInt32(0);
-  ConstantInt *One       = Builder.getInt32(1);
-  ConstantInt *ThirtyOne = Builder.getInt32(31);
-  ConstantInt *NegOne    = ConstantInt::getSigned(I32Ty, -1);
-  ConstantInt *True      = Builder.getTrue();
+  ConstantInt *True = Builder.getTrue();
 
   BasicBlock *IBB = Builder.GetInsertBlock();
   Function *F = IBB->getParent();
-  Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
-                                                I32Ty);
+  Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+                                             DivTy);
 
   // Our CFG is going to look like:
   // +---------------------+
@@ -190,6 +231,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   // We'll be overwriting the terminator to insert our extra blocks
   SpecialCases->getTerminator()->eraseFromParent();
 
+  // Same instructions are generated for both i32 (msb 31) and i64 (msb 63).
+
   // First off, check for special cases: dividend or divisor is zero, divisor
   // is greater than dividend, and divisor is 1.
   // ; special-cases:
@@ -209,12 +252,12 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   Value *Ret0_1      = Builder.CreateICmpEQ(Divisor, Zero);
   Value *Ret0_2      = Builder.CreateICmpEQ(Dividend, Zero);
   Value *Ret0_3      = Builder.CreateOr(Ret0_1, Ret0_2);
-  Value *Tmp0        = Builder.CreateCall2(CTLZi32, Divisor, True);
-  Value *Tmp1        = Builder.CreateCall2(CTLZi32, Dividend, True);
+  Value *Tmp0        = Builder.CreateCall2(CTLZ, Divisor, True);
+  Value *Tmp1        = Builder.CreateCall2(CTLZ, Dividend, True);
   Value *SR          = Builder.CreateSub(Tmp0, Tmp1);
-  Value *Ret0_4      = Builder.CreateICmpUGT(SR, ThirtyOne);
+  Value *Ret0_4      = Builder.CreateICmpUGT(SR, MSB);
   Value *Ret0        = Builder.CreateOr(Ret0_3, Ret0_4);
-  Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
+  Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
   Value *RetVal      = Builder.CreateSelect(Ret0, Zero, Dividend);
   Value *EarlyRet    = Builder.CreateOr(Ret0, RetDividend);
   Builder.CreateCondBr(EarlyRet, End, BB1);
@@ -227,7 +270,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   // ;   br i1 %skipLoop, label %loop-exit, label %preheader
   Builder.SetInsertPoint(BB1);
   Value *SR_1     = Builder.CreateAdd(SR, One);
-  Value *Tmp2     = Builder.CreateSub(ThirtyOne, SR);
+  Value *Tmp2     = Builder.CreateSub(MSB, SR);
   Value *Q        = Builder.CreateShl(Dividend, Tmp2);
   Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
   Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
@@ -260,17 +303,17 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   // ;   %tmp12 = icmp eq i32 %sr_2, 0
   // ;   br i1 %tmp12, label %loop-exit, label %do-while
   Builder.SetInsertPoint(DoWhile);
-  PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
-  PHINode *SR_3    = Builder.CreatePHI(I32Ty, 2);
-  PHINode *R_1     = Builder.CreatePHI(I32Ty, 2);
-  PHINode *Q_2     = Builder.CreatePHI(I32Ty, 2);
+  PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2);
+  PHINode *SR_3    = Builder.CreatePHI(DivTy, 2);
+  PHINode *R_1     = Builder.CreatePHI(DivTy, 2);
+  PHINode *Q_2     = Builder.CreatePHI(DivTy, 2);
   Value *Tmp5  = Builder.CreateShl(R_1, One);
-  Value *Tmp6  = Builder.CreateLShr(Q_2, ThirtyOne);
+  Value *Tmp6  = Builder.CreateLShr(Q_2, MSB);
   Value *Tmp7  = Builder.CreateOr(Tmp5, Tmp6);
   Value *Tmp8  = Builder.CreateShl(Q_2, One);
   Value *Q_1   = Builder.CreateOr(Carry_1, Tmp8);
   Value *Tmp9  = Builder.CreateSub(Tmp4, Tmp7);
-  Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
+  Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB);
   Value *Carry = Builder.CreateAnd(Tmp10, One);
   Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
   Value *R     = Builder.CreateSub(Tmp7, Tmp11);
@@ -285,8 +328,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   // ;   %q_4   = or i32 %carry_2, %tmp13
   // ;   br label %end
   Builder.SetInsertPoint(LoopExit);
-  PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
-  PHINode *Q_3     = Builder.CreatePHI(I32Ty, 2);
+  PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2);
+  PHINode *Q_3     = Builder.CreatePHI(DivTy, 2);
   Value *Tmp13 = Builder.CreateShl(Q_3, One);
   Value *Q_4   = Builder.CreateOr(Carry_2, Tmp13);
   Builder.CreateBr(End);
@@ -295,7 +338,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   // ;   %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
   // ;   ret i32 %q_5
   Builder.SetInsertPoint(End, End->begin());
-  PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
+  PHINode *Q_5 = Builder.CreatePHI(DivTy, 2);
 
   // Populate the Phis, since all values have now been created. Our Phis were:
   // ;   %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
@@ -326,9 +369,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
 /// Generate code to calculate the remainder of two integers, replacing Rem with
 /// the generated code. This currently generates code using the udiv expansion,
 /// but future work includes generating more specialized code, e.g. when more
-/// information about the operands are known. Currently only implements 32bit
-/// scalar division (due to udiv's limitation), but future work is removing this
-/// limitation.
+/// information about the operands are known. Implements both 32bit and 64bit
+/// scalar division.
 ///
 /// @brief Replace Rem with generated code.
 bool llvm::expandRemainder(BinaryOperator *Rem) {
@@ -338,6 +380,15 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
 
   IRBuilder<> Builder(Rem);
 
+  Type *RemTy = Rem->getType();
+  if (RemTy->isVectorTy())
+    llvm_unreachable("Div over vectors not supported");
+
+  unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+  if (RemTyBitWidth != 32 && RemTyBitWidth != 64)
+    llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+
   // First prepare the sign if it's a signed remainder
   if (Rem->getOpcode() == Instruction::SRem) {
     Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
@@ -376,9 +427,8 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
 /// Generate code to divide two integers, replacing Div with the generated
 /// code. This currently generates code similarly to compiler-rt's
 /// implementations, but future work includes generating more specialized code
-/// when more information about the operands are known. Currently only
-/// implements 32bit scalar division, but future work is removing this
-/// limitation.
+/// when more information about the operands are known. Implements both
+/// 32bit and 64bit scalar division.
 ///
 /// @brief Replace Div with generated code.
 bool llvm::expandDivision(BinaryOperator *Div) {
@@ -388,9 +438,15 @@ bool llvm::expandDivision(BinaryOperator *Div) {
 
   IRBuilder<> Builder(Div);
 
-  if (Div->getType()->isVectorTy())
+  Type *DivTy = Div->getType();
+  if (DivTy->isVectorTy())
     llvm_unreachable("Div over vectors not supported");
 
+  unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+  if (DivTyBitWidth != 32 && DivTyBitWidth != 64)
+    llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+
   // First prepare the sign if it's a signed division
   if (Div->getOpcode() == Instruction::SDiv) {
     // Lower the code to unsigned division, and reset Div to point to the udiv.
@@ -443,7 +499,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
   if (RemTyBitWidth == 32) 
     return expandRemainder(Rem);
 
-  // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+  // If bitwidth smaller than 32 extend inputs, extend output and proceed
   // with 32 bit division.
   IRBuilder<> Builder(Rem);
 
@@ -471,6 +527,55 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
   return expandRemainder(cast<BinaryOperator>(ExtRem));
 }
 
+/// Generate code to compute the remainder of two integers of bitwidth up to 
+/// 64 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 64 bits.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
+  assert((Rem->getOpcode() == Instruction::SRem ||
+          Rem->getOpcode() == Instruction::URem) &&
+          "Trying to expand remainder from a non-remainder function");
+
+  Type *RemTy = Rem->getType();
+  if (RemTy->isVectorTy())
+    llvm_unreachable("Div over vectors not supported");
+
+  unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+  if (RemTyBitWidth > 64) 
+    llvm_unreachable("Div of bitwidth greater than 64 not supported");
+
+  if (RemTyBitWidth == 64) 
+    return expandRemainder(Rem);
+
+  // If bitwidth smaller than 64 extend inputs, extend output and proceed
+  // with 64 bit division.
+  IRBuilder<> Builder(Rem);
+
+  Value *ExtDividend;
+  Value *ExtDivisor;
+  Value *ExtRem;
+  Value *Trunc;
+  Type *Int64Ty = Builder.getInt64Ty();
+
+  if (Rem->getOpcode() == Instruction::SRem) {
+    ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty);
+    ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty);
+    ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+  } else {
+    ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty);
+    ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty);
+    ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+  }
+  Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+  Rem->replaceAllUsesWith(Trunc);
+  Rem->dropAllReferences();
+  Rem->eraseFromParent();
+
+  return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
 
 /// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
 /// above routines and extends the inputs/truncates the outputs to operate
@@ -495,7 +600,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
   if (DivTyBitWidth == 32)
     return expandDivision(Div);
 
-  // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+  // If bitwidth smaller than 32 extend inputs, extend output and proceed
   // with 32 bit division.
   IRBuilder<> Builder(Div);
 
@@ -522,3 +627,53 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
 
   return expandDivision(cast<BinaryOperator>(ExtDiv));
 }
+
+/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 64 bits.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
+  assert((Div->getOpcode() == Instruction::SDiv ||
+          Div->getOpcode() == Instruction::UDiv) &&
+          "Trying to expand division from a non-division function");
+
+  Type *DivTy = Div->getType();
+  if (DivTy->isVectorTy())
+    llvm_unreachable("Div over vectors not supported");
+
+  unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+  if (DivTyBitWidth > 64)
+    llvm_unreachable("Div of bitwidth greater than 64 not supported");
+
+  if (DivTyBitWidth == 64)
+    return expandDivision(Div);
+
+  // If bitwidth smaller than 64 extend inputs, extend output and proceed
+  // with 64 bit division.
+  IRBuilder<> Builder(Div);
+
+  Value *ExtDividend;
+  Value *ExtDivisor;
+  Value *ExtDiv;
+  Value *Trunc;
+  Type *Int64Ty = Builder.getInt64Ty();
+
+  if (Div->getOpcode() == Instruction::SDiv) {
+    ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty);
+    ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty);
+    ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+  } else {
+    ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
+    ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
+    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);  
+  }
+  Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+  Div->replaceAllUsesWith(Trunc);
+  Div->dropAllReferences();
+  Div->eraseFromParent();
+
+  return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 97e7e5d..51a3d9c 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -27,222 +27,108 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "lcssa"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/PredIteratorCache.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
-STATISTIC(NumLCSSA, "Number of live out of a loop variables");
-
-namespace {
-  struct LCSSA : public LoopPass {
-    static char ID; // Pass identification, replacement for typeid
-    LCSSA() : LoopPass(ID) {
-      initializeLCSSAPass(*PassRegistry::getPassRegistry());
-    }
-
-    // Cached analysis information for the current function.
-    DominatorTree *DT;
-    LoopInfo *LI;
-    ScalarEvolution *SE;
-    PredIteratorCache PredCache;
-    Loop *L;
-    
-    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-    /// This transformation requires natural loop information & requires that
-    /// loop preheaders be inserted into the CFG.  It maintains both of these,
-    /// as well as the CFG.  It also requires dominator information.
-    ///
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-
-      AU.addRequired<DominatorTree>();
-      AU.addRequired<LoopInfo>();
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addPreserved<AliasAnalysis>();
-      AU.addPreserved<ScalarEvolution>();
-    }
-  private:
-    bool ProcessInstruction(Instruction *Inst,
-                            const SmallVectorImpl<BasicBlock*> &ExitBlocks);
-    
-    /// verifyAnalysis() - Verify loop nest.
-    virtual void verifyAnalysis() const {
-      // Check the special guarantees that LCSSA makes.
-      assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
-    }
-  };
-}
-  
-char LCSSA::ID = 0;
-INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
-
-Pass *llvm::createLCSSAPass() { return new LCSSA(); }
-char &llvm::LCSSAID = LCSSA::ID;
+#define DEBUG_TYPE "lcssa"
 
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
 
-/// BlockDominatesAnExit - Return true if the specified block dominates at least
-/// one of the blocks in the specified list.
-static bool BlockDominatesAnExit(BasicBlock *BB,
-                                 const SmallVectorImpl<BasicBlock*> &ExitBlocks,
-                                 DominatorTree *DT) {
-  DomTreeNode *DomNode = DT->getNode(BB);
+/// Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+                        const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-    if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+    if (ExitBlocks[i] == BB)
       return true;
-
   return false;
 }
 
+/// Given an instruction in the loop, check to see if it has any uses that are
+/// outside the current loop.  If so, insert LCSSA PHI nodes and rewrite the
+/// uses.
+static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
+                               const SmallVectorImpl<BasicBlock *> &ExitBlocks,
+                               PredIteratorCache &PredCache) {
+  SmallVector<Use *, 16> UsesToRewrite;
 
-/// runOnFunction - Process all loops in the function, inner-most out.
-bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
-  L = TheLoop;
-  
-  DT = &getAnalysis<DominatorTree>();
-  LI = &getAnalysis<LoopInfo>();
-  SE = getAnalysisIfAvailable<ScalarEvolution>();
-
-  // Get the set of exiting blocks.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  
-  if (ExitBlocks.empty())
-    return false;
-  
-  // Look at all the instructions in the loop, checking to see if they have uses
-  // outside the loop.  If so, rewrite those uses.
-  bool MadeChange = false;
-  
-  for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
-       BBI != E; ++BBI) {
-    BasicBlock *BB = *BBI;
-    
-    // For large loops, avoid use-scanning by using dominance information:  In
-    // particular, if a block does not dominate any of the loop exits, then none
-    // of the values defined in the block could be used outside the loop.
-    if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
-      continue;
-    
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end();
-         I != E; ++I) {
-      // Reject two common cases fast: instructions with no uses (like stores)
-      // and instructions with one use that is in the same block as this.
-      if (I->use_empty() ||
-          (I->hasOneUse() && I->use_back()->getParent() == BB &&
-           !isa<PHINode>(I->use_back())))
-        continue;
-      
-      MadeChange |= ProcessInstruction(I, ExitBlocks);
-    }
-  }
-
-  // If we modified the code, remove any caches about the loop from SCEV to
-  // avoid dangling entries.
-  // FIXME: This is a big hammer, can we clear the cache more selectively?
-  if (SE && MadeChange)
-    SE->forgetLoop(L);
-  
-  assert(L->isLCSSAForm(*DT));
-  PredCache.clear();
-
-  return MadeChange;
-}
+  BasicBlock *InstBB = Inst.getParent();
 
-/// isExitBlock - Return true if the specified block is in the list.
-static bool isExitBlock(BasicBlock *BB,
-                        const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
-  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-    if (ExitBlocks[i] == BB)
-      return true;
-  return false;
-}
+  for (Use &U : Inst.uses()) {
+    Instruction *User = cast<Instruction>(U.getUser());
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User))
+      UserBB = PN->getIncomingBlock(U);
 
-/// ProcessInstruction - Given an instruction in the loop, check to see if it
-/// has any uses that are outside the current loop.  If so, insert LCSSA PHI
-/// nodes and rewrite the uses.
-bool LCSSA::ProcessInstruction(Instruction *Inst,
-                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
-  SmallVector<Use*, 16> UsesToRewrite;
-  
-  BasicBlock *InstBB = Inst->getParent();
-  
-  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
-       UI != E; ++UI) {
-    User *U = *UI;
-    BasicBlock *UserBB = cast<Instruction>(U)->getParent();
-    if (PHINode *PN = dyn_cast<PHINode>(U))
-      UserBB = PN->getIncomingBlock(UI);
-    
-    if (InstBB != UserBB && !L->contains(UserBB))
-      UsesToRewrite.push_back(&UI.getUse());
+    if (InstBB != UserBB && !L.contains(UserBB))
+      UsesToRewrite.push_back(&U);
   }
 
   // If there are no uses outside the loop, exit with no change.
-  if (UsesToRewrite.empty()) return false;
-  
+  if (UsesToRewrite.empty())
+    return false;
+
   ++NumLCSSA; // We are applying the transformation
 
   // Invoke instructions are special in that their result value is not available
-  // along their unwind edge. The code below tests to see whether DomBB dominates
+  // along their unwind edge. The code below tests to see whether DomBB
+  // dominates
   // the value, so adjust DomBB to the normal destination block, which is
   // effectively where the value is first usable.
-  BasicBlock *DomBB = Inst->getParent();
-  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+  BasicBlock *DomBB = Inst.getParent();
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
     DomBB = Inv->getNormalDest();
 
-  DomTreeNode *DomNode = DT->getNode(DomBB);
+  DomTreeNode *DomNode = DT.getNode(DomBB);
 
-  SmallVector<PHINode*, 16> AddedPHIs;
+  SmallVector<PHINode *, 16> AddedPHIs;
 
   SSAUpdater SSAUpdate;
-  SSAUpdate.Initialize(Inst->getType(), Inst->getName());
-  
+  SSAUpdate.Initialize(Inst.getType(), Inst.getName());
+
   // Insert the LCSSA phi's into all of the exit blocks dominated by the
   // value, and add them to the Phi's map.
-  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
-      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+  for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(),
+                                                     BBE = ExitBlocks.end();
+       BBI != BBE; ++BBI) {
     BasicBlock *ExitBB = *BBI;
-    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
-    
+    if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
+      continue;
+
     // If we already inserted something for this BB, don't reprocess it.
-    if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
-    
-    PHINode *PN = PHINode::Create(Inst->getType(),
-                                  PredCache.GetNumPreds(ExitBB),
-                                  Inst->getName()+".lcssa",
-                                  ExitBB->begin());
+    if (SSAUpdate.HasValueForBlock(ExitBB))
+      continue;
+
+    PHINode *PN = PHINode::Create(Inst.getType(), PredCache.GetNumPreds(ExitBB),
+                                  Inst.getName() + ".lcssa", ExitBB->begin());
 
     // Add inputs from inside the loop for this PHI.
     for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
-      PN->addIncoming(Inst, *PI);
+      PN->addIncoming(&Inst, *PI);
 
       // If the exit block has a predecessor not within the loop, arrange for
       // the incoming value use corresponding to that predecessor to be
       // rewritten in terms of a different LCSSA PHI.
-      if (!L->contains(*PI))
+      if (!L.contains(*PI))
         UsesToRewrite.push_back(
-          &PN->getOperandUse(
-            PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+            &PN->getOperandUse(PN->getOperandNumForIncomingValue(
+                 PN->getNumIncomingValues() - 1)));
     }
 
     AddedPHIs.push_back(PN);
-    
+
     // Remember that this phi makes the value alive in this block.
     SSAUpdate.AddAvailableValue(ExitBB, PN);
   }
@@ -259,15 +145,14 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
     if (PHINode *PN = dyn_cast<PHINode>(User))
       UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
 
-    if (isa<PHINode>(UserBB->begin()) &&
-        isExitBlock(UserBB, ExitBlocks)) {
+    if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
       // Tell the VHs that the uses changed. This updates SCEV's caches.
       if (UsesToRewrite[i]->get()->hasValueHandle())
         ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
       UsesToRewrite[i]->set(UserBB->begin());
       continue;
     }
-    
+
     // Otherwise, do full PHI insertion.
     SSAUpdate.RewriteUse(*UsesToRewrite[i]);
   }
@@ -277,7 +162,152 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
     if (AddedPHIs[i]->use_empty())
       AddedPHIs[i]->eraseFromParent();
   }
-  
+
   return true;
 }
 
+/// Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool
+blockDominatesAnExit(BasicBlock *BB,
+                     DominatorTree &DT,
+                     const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
+  DomTreeNode *DomNode = DT.getNode(BB);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i])))
+      return true;
+
+  return false;
+}
+
+bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) {
+  bool Changed = false;
+
+  // Get the set of exiting blocks.
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  L.getExitBlocks(ExitBlocks);
+
+  if (ExitBlocks.empty())
+    return false;
+
+  PredIteratorCache PredCache;
+
+  // Look at all the instructions in the loop, checking to see if they have uses
+  // outside the loop.  If so, rewrite those uses.
+  for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end();
+       BBI != BBE; ++BBI) {
+    BasicBlock *BB = *BBI;
+
+    // For large loops, avoid use-scanning by using dominance information:  In
+    // particular, if a block does not dominate any of the loop exits, then none
+    // of the values defined in the block could be used outside the loop.
+    if (!blockDominatesAnExit(BB, DT, ExitBlocks))
+      continue;
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // Reject two common cases fast: instructions with no uses (like stores)
+      // and instructions with one use that is in the same block as this.
+      if (I->use_empty() ||
+          (I->hasOneUse() && I->user_back()->getParent() == BB &&
+           !isa<PHINode>(I->user_back())))
+        continue;
+
+      Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache);
+    }
+  }
+
+  // If we modified the code, remove any caches about the loop from SCEV to
+  // avoid dangling entries.
+  // FIXME: This is a big hammer, can we clear the cache more selectively?
+  if (SE && Changed)
+    SE->forgetLoop(&L);
+
+  assert(L.isLCSSAForm(DT));
+
+  return Changed;
+}
+
+/// Process a loop nest depth first.
+bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT,
+                                ScalarEvolution *SE) {
+  bool Changed = false;
+
+  // Recurse depth-first through inner loops.
+  for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+    Changed |= formLCSSARecursively(**LI, DT, SE);
+
+  Changed |= formLCSSA(L, DT, SE);
+  return Changed;
+}
+
+namespace {
+struct LCSSA : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  LCSSA() : FunctionPass(ID) {
+    initializeLCSSAPass(*PassRegistry::getPassRegistry());
+  }
+
+  // Cached analysis information for the current function.
+  DominatorTree *DT;
+  LoopInfo *LI;
+  ScalarEvolution *SE;
+
+  bool runOnFunction(Function &F) override;
+
+  /// This transformation requires natural loop information & requires that
+  /// loop preheaders be inserted into the CFG.  It maintains both of these,
+  /// as well as the CFG.  It also requires dominator information.
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfo>();
+    AU.addPreservedID(LoopSimplifyID);
+    AU.addPreserved<AliasAnalysis>();
+    AU.addPreserved<ScalarEvolution>();
+  }
+
+private:
+  void verifyAnalysis() const override;
+};
+}
+
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// Process all loops in the function, inner-most out.
+bool LCSSA::runOnFunction(Function &F) {
+  bool Changed = false;
+  LI = &getAnalysis<LoopInfo>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+  // Simplify each loop nest in the function.
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    Changed |= formLCSSARecursively(**I, *DT, SE);
+
+  return Changed;
+}
+
+static void verifyLoop(Loop &L, DominatorTree &DT) {
+  // Recurse depth-first through inner loops.
+  for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+    verifyLoop(**LI, DT);
+
+  // Check the special guarantees that LCSSA makes.
+  //assert(L.isLCSSAForm(DT) && "LCSSA form not preserved!");
+}
+
+void LCSSA::verifyAnalysis() const {
+  // Verify each loop nest in the function, assuming LI still points at that
+  // function's loop info.
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    verifyLoop(**I, *DT);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 2768041..a5e443f 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -17,15 +17,17 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
@@ -35,14 +37,14 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "local"
+
 STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
 
 //===----------------------------------------------------------------------===//
@@ -127,8 +129,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       // dest.  If so, eliminate it as an explicit compare.
       if (i.getCaseSuccessor() == DefaultDest) {
         MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
-        // MD should have 2 + NumCases operands.
-        if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) {
+        unsigned NCases = SI->getNumCases();
+        // Fold the case metadata into the default if there will be any branches
+        // left, unless the metadata doesn't match the switch.
+        if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) {
           // Collect branch weights into a vector.
           SmallVector<uint32_t, 8> Weights;
           for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
@@ -157,7 +161,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       // Otherwise, check to see if the switch only branches to one destination.
       // We do this by reseting "TheOnlyDest" to null when we find two non-equal
       // destinations.
-      if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
+      if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr;
     }
 
     if (CI && !TheOnlyDest) {
@@ -178,7 +182,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
         // Found case matching a constant operand?
         BasicBlock *Succ = SI->getSuccessor(i);
         if (Succ == TheOnlyDest)
-          TheOnlyDest = 0;  // Don't modify the first branch to TheOnlyDest
+          TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
         else
           Succ->removePredecessor(BB);
       }
@@ -231,7 +235,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
 
       for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
         if (IBI->getDestination(i) == TheOnlyDest)
-          TheOnlyDest = 0;
+          TheOnlyDest = nullptr;
         else
           IBI->getDestination(i)->removePredecessor(IBI->getParent());
       }
@@ -329,7 +333,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
     // dead as we go.
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       Value *OpV = I->getOperand(i);
-      I->setOperand(i, 0);
+      I->setOperand(i, nullptr);
 
       if (!OpV->use_empty()) continue;
 
@@ -352,8 +356,8 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
 /// true when there are no uses or multiple uses that all refer to the same
 /// value.
 static bool areAllUsesEqual(Instruction *I) {
-  Value::use_iterator UI = I->use_begin();
-  Value::use_iterator UE = I->use_end();
+  Value::user_iterator UI = I->user_begin();
+  Value::user_iterator UE = I->user_end();
   if (UI == UE)
     return true;
 
@@ -374,7 +378,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
                                         const TargetLibraryInfo *TLI) {
   SmallPtrSet<Instruction*, 4> Visited;
   for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
-       I = cast<Instruction>(*I->use_begin())) {
+       I = cast<Instruction>(*I->user_begin())) {
     if (I->use_empty())
       return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
 
@@ -505,12 +509,18 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   PredBB->getTerminator()->eraseFromParent();
   DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
 
+  // If the PredBB is the entry block of the function, move DestBB up to
+  // become the entry block after we erase PredBB.
+  if (PredBB == &DestBB->getParent()->getEntryBlock())
+    DestBB->moveAfter(PredBB);
+
   if (P) {
-    DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
-    if (DT) {
-      BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
-      DT->changeImmediateDominator(DestBB, PredBBIDom);
-      DT->eraseNode(PredBB);
+    if (DominatorTreeWrapperPass *DTWP =
+            P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+      DominatorTree &DT = DTWP->getDomTree();
+      BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock();
+      DT.changeImmediateDominator(DestBB, PredBBIDom);
+      DT.eraseNode(PredBB);
     }
   }
   // Nuke BB.
@@ -749,10 +759,9 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
   if (!Succ->getSinglePredecessor()) {
     BasicBlock::iterator BBI = BB->begin();
     while (isa<PHINode>(*BBI)) {
-      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
-           UI != E; ++UI) {
-        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
-          if (PN->getIncomingBlock(UI) != BB)
+      for (Use &U : BBI->uses()) {
+        if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) {
+          if (PN->getIncomingBlock(U) != BB)
             return false;
         } else {
           return false;
@@ -892,24 +901,26 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
     return PrefAlign;
   }
 
-  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+  if (auto *GO = dyn_cast<GlobalObject>(V)) {
     // If there is a large requested alignment and we can, bump up the alignment
     // of the global.
-    if (GV->isDeclaration()) return Align;
+    if (GO->isDeclaration())
+      return Align;
     // If the memory we set aside for the global may not be the memory used by
     // the final program then it is impossible for us to reliably enforce the
     // preferred alignment.
-    if (GV->isWeakForLinker()) return Align;
+    if (GO->isWeakForLinker())
+      return Align;
 
-    if (GV->getAlignment() >= PrefAlign)
-      return GV->getAlignment();
+    if (GO->getAlignment() >= PrefAlign)
+      return GO->getAlignment();
     // We can only increase the alignment of the global if it has no alignment
     // specified or if it is not assigned a section.  If it is assigned a
     // section, the global could be densely packed with other objects in the
     // section, increasing the alignment could cause padding issues.
-    if (!GV->hasSection() || GV->getAlignment() == 0)
-      GV->setAlignment(PrefAlign);
-    return GV->getAlignment();
+    if (!GO->hasSection() || GO->getAlignment() == 0)
+      GO->setAlignment(PrefAlign);
+    return GO->getAlignment();
   }
 
   return Align;
@@ -926,7 +937,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
   unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
 
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+  computeKnownBits(V, KnownZero, KnownOne, DL);
   unsigned TrailZ = KnownZero.countTrailingOnes();
 
   // Avoid trouble with ridiculously large TrailZ values, such as
@@ -979,10 +990,10 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (LdStHasDebugValue(DIVar, SI))
     return true;
 
-  Instruction *DbgVal = NULL;
+  Instruction *DbgVal = nullptr;
   // If an argument is zero extended then use argument directly. The ZExt
   // may be zapped by an optimization pass in future.
-  Argument *ExtendedArg = NULL;
+  Argument *ExtendedArg = nullptr;
   if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
     ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
   if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
@@ -991,14 +1002,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
     DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
   else
     DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
-
-  // Propagate any debug metadata from the store onto the dbg.value.
-  DebugLoc SIDL = SI->getDebugLoc();
-  if (!SIDL.isUnknown())
-    DbgVal->setDebugLoc(SIDL);
-  // Otherwise propagate debug metadata from dbg.declare.
-  else
-    DbgVal->setDebugLoc(DDI->getDebugLoc());
+  DbgVal->setDebugLoc(DDI->getDebugLoc());
   return true;
 }
 
@@ -1018,51 +1022,54 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   Instruction *DbgVal =
     Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
                                     DIVar, LI);
-
-  // Propagate any debug metadata from the store onto the dbg.value.
-  DebugLoc LIDL = LI->getDebugLoc();
-  if (!LIDL.isUnknown())
-    DbgVal->setDebugLoc(LIDL);
-  // Otherwise propagate debug metadata from dbg.declare.
-  else
-    DbgVal->setDebugLoc(DDI->getDebugLoc());
+  DbgVal->setDebugLoc(DDI->getDebugLoc());
   return true;
 }
 
+/// Determine whether this alloca is either a VLA or an array.
+static bool isArray(AllocaInst *AI) {
+  return AI->isArrayAllocation() ||
+    AI->getType()->getElementType()->isArrayTy();
+}
+
 /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
 /// of llvm.dbg.value intrinsics.
 bool llvm::LowerDbgDeclare(Function &F) {
   DIBuilder DIB(*F.getParent());
   SmallVector<DbgDeclareInst *, 4> Dbgs;
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
-      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+  for (auto &FI : F)
+    for (BasicBlock::iterator BI : FI)
+      if (auto DDI = dyn_cast<DbgDeclareInst>(BI))
         Dbgs.push_back(DDI);
-    }
+
   if (Dbgs.empty())
     return false;
 
-  for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
-         E = Dbgs.end(); I != E; ++I) {
-    DbgDeclareInst *DDI = *I;
+  for (auto &I : Dbgs) {
+    DbgDeclareInst *DDI = I;
     AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
     // If this is an alloca for a scalar variable, insert a dbg.value
     // at each load and store to the alloca and erase the dbg.declare.
-    if (AI && !AI->isArrayAllocation()) {
-
-      // We only remove the dbg.declare intrinsic if all uses are
-      // converted to dbg.value intrinsics.
-      bool RemoveDDI = true;
-      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
-           UI != E; ++UI)
-        if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+    // The dbg.values allow tracking a variable even if it is not
+    // stored on the stack, while the dbg.declare can only describe
+    // the stack slot (and at a lexical-scope granularity). Later
+    // passes will attempt to elide the stack slot.
+    if (AI && !isArray(AI)) {
+      for (User *U : AI->users())
+        if (StoreInst *SI = dyn_cast<StoreInst>(U))
           ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
-        else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+        else if (LoadInst *LI = dyn_cast<LoadInst>(U))
           ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
-        else
-          RemoveDDI = false;
-      if (RemoveDDI)
-        DDI->eraseFromParent();
+        else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+	  // This is a call by-value or some other instruction that
+	  // takes a pointer to the variable. Insert a *value*
+	  // intrinsic that describes the alloca.
+	  auto DbgVal =
+	    DIB.insertDbgValueIntrinsic(AI, 0,
+					DIVariable(DDI->getVariable()), CI);
+	  DbgVal->setDebugLoc(DDI->getDebugLoc());
+	}
+      DDI->eraseFromParent();
     }
   }
   return true;
@@ -1072,12 +1079,11 @@ bool llvm::LowerDbgDeclare(Function &F) {
 /// alloca 'V', if any.
 DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
   if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
-    for (Value::use_iterator UI = DebugNode->use_begin(),
-         E = DebugNode->use_end(); UI != E; ++UI)
-      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+    for (User *U : DebugNode->users())
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
         return DDI;
 
-  return 0;
+  return nullptr;
 }
 
 bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 6d5f16c..ef42291 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,331 +37,72 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "loop-simplify"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "loop-simplify"
+
 STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
 STATISTIC(NumNested  , "Number of nested loops split out");
 
-namespace {
-  struct LoopSimplify : public LoopPass {
-    static char ID; // Pass identification, replacement for typeid
-    LoopSimplify() : LoopPass(ID) {
-      initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
-    }
-
-    // AA - If we have an alias analysis object to update, this is it, otherwise
-    // this is null.
-    AliasAnalysis *AA;
-    LoopInfo *LI;
-    DominatorTree *DT;
-    ScalarEvolution *SE;
-    Loop *L;
-    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      // We need loop information to identify the loops...
-      AU.addRequired<DominatorTree>();
-      AU.addPreserved<DominatorTree>();
-
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
-
-      AU.addPreserved<AliasAnalysis>();
-      AU.addPreserved<ScalarEvolution>();
-      AU.addPreserved<DependenceAnalysis>();
-      AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
-    }
-
-    /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
-    void verifyAnalysis() const;
-
-  private:
-    bool ProcessLoop(Loop *L, LPPassManager &LPM);
-    BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
-    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
-                             BasicBlock *Preheader);
-    BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
-  };
-}
-
-static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
-                                     SmallVectorImpl<BasicBlock*> &SplitPreds,
-                                     Loop *L);
-
-char LoopSimplify::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
-                "Canonicalize natural loops", true, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
-                "Canonicalize natural loops", true, false)
-
-// Publicly exposed interface to pass...
-char &llvm::LoopSimplifyID = LoopSimplify::ID;
-Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
-
-/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
-/// it in any convenient order) inserting preheaders...
-///
-bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
-  L = l;
-  bool Changed = false;
-  LI = &getAnalysis<LoopInfo>();
-  AA = getAnalysisIfAvailable<AliasAnalysis>();
-  DT = &getAnalysis<DominatorTree>();
-  SE = getAnalysisIfAvailable<ScalarEvolution>();
-
-  Changed |= ProcessLoop(L, LPM);
-
-  return Changed;
-}
-
-/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
-/// all loops have preheaders.
-///
-bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
-  bool Changed = false;
-ReprocessLoop:
-
-  // Check to see that no blocks (other than the header) in this loop have
-  // predecessors that are not in the loop.  This is not valid for natural
-  // loops, but can occur if the blocks are unreachable.  Since they are
-  // unreachable we can just shamelessly delete those CFG edges!
-  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
-       BB != E; ++BB) {
-    if (*BB == L->getHeader()) continue;
-
-    SmallPtrSet<BasicBlock*, 4> BadPreds;
-    for (pred_iterator PI = pred_begin(*BB),
-         PE = pred_end(*BB); PI != PE; ++PI) {
-      BasicBlock *P = *PI;
-      if (!L->contains(P))
-        BadPreds.insert(P);
-    }
-
-    // Delete each unique out-of-loop (and thus dead) predecessor.
-    for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
-         E = BadPreds.end(); I != E; ++I) {
-
-      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
-                   << (*I)->getName() << "\n");
-
-      // Inform each successor of each dead pred.
-      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
-        (*SI)->removePredecessor(*I);
-      // Zap the dead pred's terminator and replace it with unreachable.
-      TerminatorInst *TI = (*I)->getTerminator();
-       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
-      (*I)->getTerminator()->eraseFromParent();
-      new UnreachableInst((*I)->getContext(), *I);
-      Changed = true;
-    }
-  }
-
-  // If there are exiting blocks with branches on undef, resolve the undef in
-  // the direction which will exit the loop. This will help simplify loop
-  // trip count computations.
-  SmallVector<BasicBlock*, 8> ExitingBlocks;
-  L->getExitingBlocks(ExitingBlocks);
-  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
-       E = ExitingBlocks.end(); I != E; ++I)
-    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
-      if (BI->isConditional()) {
-        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
-
-          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
-                       << (*I)->getName() << "\n");
-
-          BI->setCondition(ConstantInt::get(Cond->getType(),
-                                            !L->contains(BI->getSuccessor(0))));
-
-          // This may make the loop analyzable, force SCEV recomputation.
-          if (SE)
-            SE->forgetLoop(L);
-
-          Changed = true;
-        }
-      }
-
-  // Does the loop already have a preheader?  If so, don't insert one.
-  BasicBlock *Preheader = L->getLoopPreheader();
-  if (!Preheader) {
-    Preheader = InsertPreheaderForLoop(L, this);
-    if (Preheader) {
-      ++NumInserted;
-      Changed = true;
-    }
-  }
-
-  // Next, check to make sure that all exit nodes of the loop only have
-  // predecessors that are inside of the loop.  This check guarantees that the
-  // loop preheader/header will dominate the exit blocks.  If the exit block has
-  // predecessors from outside of the loop, split the edge now.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-
-  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
-                                               ExitBlocks.end());
-  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
-         E = ExitBlockSet.end(); I != E; ++I) {
-    BasicBlock *ExitBlock = *I;
-    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
-         PI != PE; ++PI)
-      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
-      // allowed.
-      if (!L->contains(*PI)) {
-        if (RewriteLoopExitBlock(L, ExitBlock)) {
-          ++NumInserted;
-          Changed = true;
-        }
-        break;
-      }
-  }
-
-  // If the header has more than two predecessors at this point (from the
-  // preheader and from multiple backedges), we must adjust the loop.
-  BasicBlock *LoopLatch = L->getLoopLatch();
-  if (!LoopLatch) {
-    // If this is really a nested loop, rip it out into a child loop.  Don't do
-    // this for loops with a giant number of backedges, just factor them into a
-    // common backedge instead.
-    if (L->getNumBackEdges() < 8) {
-      if (SeparateNestedLoop(L, LPM, Preheader)) {
-        ++NumNested;
-        // This is a big restructuring change, reprocess the whole loop.
-        Changed = true;
-        // GCC doesn't tail recursion eliminate this.
-        goto ReprocessLoop;
-      }
-    }
-
-    // If we either couldn't, or didn't want to, identify nesting of the loops,
-    // insert a new block that all backedges target, then make it jump to the
-    // loop header.
-    LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
-    if (LoopLatch) {
-      ++NumInserted;
-      Changed = true;
-    }
+// If the block isn't already, move the new block to right after some 'outside
+// block' block.  This prevents the preheader from being placed inside the loop
+// body, e.g. when the loop hasn't been rotated.
+static void placeSplitBlockCarefully(BasicBlock *NewBB,
+                                     SmallVectorImpl<BasicBlock *> &SplitPreds,
+                                     Loop *L) {
+  // Check to see if NewBB is already well placed.
+  Function::iterator BBI = NewBB; --BBI;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    if (&*BBI == SplitPreds[i])
+      return;
   }
 
-  // Scan over the PHI nodes in the loop header.  Since they now have only two
-  // incoming values (the loop is canonicalized), we may have simplified the PHI
-  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
-  PHINode *PN;
-  for (BasicBlock::iterator I = L->getHeader()->begin();
-       (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
-      if (AA) AA->deleteValue(PN);
-      if (SE) SE->forgetValue(PN);
-      PN->replaceAllUsesWith(V);
-      PN->eraseFromParent();
-    }
-
-  // If this loop has multiple exits and the exits all go to the same
-  // block, attempt to merge the exits. This helps several passes, such
-  // as LoopRotation, which do not support loops with multiple exits.
-  // SimplifyCFG also does this (and this code uses the same utility
-  // function), however this code is loop-aware, where SimplifyCFG is
-  // not. That gives it the advantage of being able to hoist
-  // loop-invariant instructions out of the way to open up more
-  // opportunities, and the disadvantage of having the responsibility
-  // to preserve dominator information.
-  bool UniqueExit = true;
-  if (!ExitBlocks.empty())
-    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
-      if (ExitBlocks[i] != ExitBlocks[0]) {
-        UniqueExit = false;
-        break;
-      }
-  if (UniqueExit) {
-    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-      BasicBlock *ExitingBlock = ExitingBlocks[i];
-      if (!ExitingBlock->getSinglePredecessor()) continue;
-      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-      if (!BI || !BI->isConditional()) continue;
-      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
-      if (!CI || CI->getParent() != ExitingBlock) continue;
-
-      // Attempt to hoist out all instructions except for the
-      // comparison and the branch.
-      bool AllInvariant = true;
-      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
-        Instruction *Inst = I++;
-        // Skip debug info intrinsics.
-        if (isa<DbgInfoIntrinsic>(Inst))
-          continue;
-        if (Inst == CI)
-          continue;
-        if (!L->makeLoopInvariant(Inst, Changed,
-                                  Preheader ? Preheader->getTerminator() : 0)) {
-          AllInvariant = false;
-          break;
-        }
-      }
-      if (!AllInvariant) continue;
-
-      // The block has now been cleared of all instructions except for
-      // a comparison and a conditional branch. SimplifyCFG may be able
-      // to fold it now.
-      if (!FoldBranchToCommonDest(BI)) continue;
-
-      // Success. The block is now dead, so remove it from the loop,
-      // update the dominator tree and delete it.
-      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
-                   << ExitingBlock->getName() << "\n");
-
-      // If any reachable control flow within this loop has changed, notify
-      // ScalarEvolution. Currently assume the parent loop doesn't change
-      // (spliting edges doesn't count). If blocks, CFG edges, or other values
-      // in the parent loop change, then we need call to forgetLoop() for the
-      // parent instead.
-      if (SE)
-        SE->forgetLoop(L);
-
-      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
-      Changed = true;
-      LI->removeBlock(ExitingBlock);
-
-      DomTreeNode *Node = DT->getNode(ExitingBlock);
-      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
-        Node->getChildren();
-      while (!Children.empty()) {
-        DomTreeNode *Child = Children.front();
-        DT->changeImmediateDominator(Child, Node->getIDom());
-      }
-      DT->eraseNode(ExitingBlock);
+  // If it isn't already after an outside block, move it after one.  This is
+  // always good as it makes the uncond branch from the outside block into a
+  // fall-through.
 
-      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
-      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
-      ExitingBlock->eraseFromParent();
+  // Figure out *which* outside block to put this after.  Prefer an outside
+  // block that neighbors a BB actually in the loop.
+  BasicBlock *FoundBB = nullptr;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    Function::iterator BBI = SplitPreds[i];
+    if (++BBI != NewBB->getParent()->end() &&
+        L->contains(BBI)) {
+      FoundBB = SplitPreds[i];
+      break;
     }
   }
 
-  return Changed;
+  // If our heuristic for a *good* bb to place this after doesn't find
+  // anything, just pick something.  It's likely better than leaving it within
+  // the loop.
+  if (!FoundBB)
+    FoundBB = SplitPreds[0];
+  NewBB->moveAfter(FoundBB);
 }
 
 /// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
@@ -380,7 +121,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
       // If the loop is branched to from an indirect branch, we won't
       // be able to fully transform the loop, because it prohibits
       // edge splitting.
-      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+      if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
 
       // Keep track of it.
       OutsideBlocks.push_back(P);
@@ -406,38 +147,39 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
-  PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
+  placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
 
   return PreheaderBB;
 }
 
-/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
-/// blocks.  This method is used to split exit blocks that have predecessors
-/// outside of the loop.
-BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+/// \brief Ensure that the loop preheader dominates all exit blocks.
+///
+/// This method is used to split exit blocks that have predecessors outside of
+/// the loop.
+static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
   SmallVector<BasicBlock*, 8> LoopBlocks;
   for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
     BasicBlock *P = *I;
     if (L->contains(P)) {
       // Don't do this if the loop is exited via an indirect branch.
-      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+      if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
 
       LoopBlocks.push_back(P);
     }
   }
 
   assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
-  BasicBlock *NewExitBB = 0;
+  BasicBlock *NewExitBB = nullptr;
 
   if (Exit->isLandingPad()) {
     SmallVector<BasicBlock*, 2> NewBBs;
     SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
                                                             LoopBlocks.size()),
                                 ".loopexit", ".nonloopexit",
-                                this, NewBBs);
+                                PP, NewBBs);
     NewExitBB = NewBBs[0];
   } else {
-    NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
+    NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP);
   }
 
   DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -445,33 +187,33 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
   return NewExitBB;
 }
 
-/// AddBlockAndPredsToSet - Add the specified block, and all of its
-/// predecessors, to the specified set, if it's not already in there.  Stop
-/// predecessor traversal when we reach StopBlock.
-static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+/// Add the specified block, and all of its predecessors, to the specified set,
+/// if it's not already in there.  Stop predecessor traversal when we reach
+/// StopBlock.
+static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
                                   std::set<BasicBlock*> &Blocks) {
-  std::vector<BasicBlock *> WorkList;
-  WorkList.push_back(InputBB);
+  SmallVector<BasicBlock *, 8> Worklist;
+  Worklist.push_back(InputBB);
   do {
-    BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+    BasicBlock *BB = Worklist.pop_back_val();
     if (Blocks.insert(BB).second && BB != StopBlock)
       // If BB is not already processed and it is not a stop block then
       // insert its predecessor in the work list
       for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
         BasicBlock *WBB = *I;
-        WorkList.push_back(WBB);
+        Worklist.push_back(WBB);
       }
-  } while(!WorkList.empty());
+  } while (!Worklist.empty());
 }
 
-/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
-/// PHI node that tells us how to partition the loops.
-static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
-                                        AliasAnalysis *AA, LoopInfo *LI) {
+/// \brief The first part of loop-nestification is to find a PHI node that tells
+/// us how to partition the loops.
+static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
+                                        DominatorTree *DT) {
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+    if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) {
       // This is a degenerate PHI already, don't modify it!
       PN->replaceAllUsesWith(V);
       if (AA) AA->deleteValue(PN);
@@ -486,49 +228,13 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
         // We found something tasty to remove.
         return PN;
   }
-  return 0;
+  return nullptr;
 }
 
-// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
-// right after some 'outside block' block.  This prevents the preheader from
-// being placed inside the loop body, e.g. when the loop hasn't been rotated.
-void PlaceSplitBlockCarefully(BasicBlock *NewBB,
-                              SmallVectorImpl<BasicBlock*> &SplitPreds,
-                              Loop *L) {
-  // Check to see if NewBB is already well placed.
-  Function::iterator BBI = NewBB; --BBI;
-  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
-    if (&*BBI == SplitPreds[i])
-      return;
-  }
-
-  // If it isn't already after an outside block, move it after one.  This is
-  // always good as it makes the uncond branch from the outside block into a
-  // fall-through.
-
-  // Figure out *which* outside block to put this after.  Prefer an outside
-  // block that neighbors a BB actually in the loop.
-  BasicBlock *FoundBB = 0;
-  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
-    Function::iterator BBI = SplitPreds[i];
-    if (++BBI != NewBB->getParent()->end() &&
-        L->contains(BBI)) {
-      FoundBB = SplitPreds[i];
-      break;
-    }
-  }
-
-  // If our heuristic for a *good* bb to place this after doesn't find
-  // anything, just pick something.  It's likely better than leaving it within
-  // the loop.
-  if (!FoundBB)
-    FoundBB = SplitPreds[0];
-  NewBB->moveAfter(FoundBB);
-}
-
-
-/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
-/// them out into a nested loop.  This is important for code that looks like
+/// \brief If this loop has multiple backedges, try to pull one of them out into
+/// a nested loop.
+///
+/// This is important for code that looks like
 /// this:
 ///
 ///  Loop:
@@ -544,18 +250,19 @@ void PlaceSplitBlockCarefully(BasicBlock *NewBB,
 /// If we are able to separate out a loop, return the new outer loop that was
 /// created.
 ///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
-                                       BasicBlock *Preheader) {
+static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
+                                AliasAnalysis *AA, DominatorTree *DT,
+                                LoopInfo *LI, ScalarEvolution *SE, Pass *PP) {
   // Don't try to separate loops without a preheader.
   if (!Preheader)
-    return 0;
+    return nullptr;
 
   // The header is not a landing pad; preheader insertion should ensure this.
   assert(!L->getHeader()->isLandingPad() &&
          "Can't insert backedge to landing pad");
 
-  PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
-  if (PN == 0) return 0;  // No known way to partition.
+  PHINode *PN = findPHIToPartitionLoops(L, AA, DT);
+  if (!PN) return nullptr;  // No known way to partition.
 
   // Pull out all predecessors that have varying values in the loop.  This
   // handles the case when a PHI node has multiple instances of itself as
@@ -566,7 +273,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
         !L->contains(PN->getIncomingBlock(i))) {
       // We can't split indirectbr edges.
       if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
-        return 0;
+        return nullptr;
       OuterLoopPreds.push_back(PN->getIncomingBlock(i));
     }
   }
@@ -580,11 +287,11 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
 
   BasicBlock *Header = L->getHeader();
   BasicBlock *NewBB =
-    SplitBlockPredecessors(Header, OuterLoopPreds,  ".outer", this);
+    SplitBlockPredecessors(Header, OuterLoopPreds,  ".outer", PP);
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
-  PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+  placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
 
   // Create the new outer loop.
   Loop *NewOuter = new Loop();
@@ -598,9 +305,6 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
   // L is now a subloop of our outer loop.
   NewOuter->addChildLoop(L);
 
-  // Add the new loop to the pass manager queue.
-  LPM.insertLoopIntoQueue(NewOuter);
-
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     NewOuter->addBlockEntry(*I);
@@ -615,7 +319,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
   for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
     BasicBlock *P = *PI;
     if (DT->dominates(Header, P))
-      AddBlockAndPredsToSet(P, Header, BlocksInL);
+      addBlockAndPredsToSet(P, Header, BlocksInL);
   }
 
   // Scan all of the loop children of L, moving them to OuterLoop if they are
@@ -643,15 +347,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
   return NewOuter;
 }
 
-
-
-/// InsertUniqueBackedgeBlock - This method is called when the specified loop
-/// has more than one backedge in it.  If this occurs, revector all of these
-/// backedges to target a new basic block and have that block branch to the loop
-/// header.  This ensures that loops have exactly one backedge.
+/// \brief This method is called when the specified loop has more than one
+/// backedge in it.
 ///
-BasicBlock *
-LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+/// If this occurs, revector all of these backedges to target a new basic block
+/// and have that block branch to the loop header.  This ensures that loops
+/// have exactly one backedge.
+static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
+                                             AliasAnalysis *AA,
+                                             DominatorTree *DT, LoopInfo *LI) {
   assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
 
   // Get information about the loop
@@ -660,7 +364,7 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
 
   // Unique backedge insertion currently depends on having a preheader.
   if (!Preheader)
-    return 0;
+    return nullptr;
 
   // The header is not a landing pad; preheader insertion should ensure this.
   assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
@@ -672,7 +376,7 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
 
     // Indirectbr edges cannot be split, so we must fail if we find one.
     if (isa<IndirectBrInst>(P->getTerminator()))
-      return 0;
+      return nullptr;
 
     if (P != Preheader) BackedgeBlocks.push_back(P);
   }
@@ -701,7 +405,7 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
     // preheader over to the new PHI node.
     unsigned PreheaderIdx = ~0U;
     bool HasUniqueIncomingValue = true;
-    Value *UniqueValue = 0;
+    Value *UniqueValue = nullptr;
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       BasicBlock *IBB = PN->getIncomingBlock(i);
       Value *IV = PN->getIncomingValue(i);
@@ -710,7 +414,7 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
       } else {
         NewPN->addIncoming(IV, IBB);
         if (HasUniqueIncomingValue) {
-          if (UniqueValue == 0)
+          if (!UniqueValue)
             UniqueValue = IV;
           else if (UniqueValue != IV)
             HasUniqueIncomingValue = false;
@@ -762,7 +466,350 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   return BEBlock;
 }
 
-void LoopSimplify::verifyAnalysis() const {
+/// \brief Simplify one loop and queue further loops for simplification.
+///
+/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
+/// Pass pointer. The Pass pointer is used by numerous utilities to update
+/// specific analyses. Rather than a pass it would be much cleaner and more
+/// explicit if they accepted the analysis directly and then updated it.
+static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
+                            AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
+                            ScalarEvolution *SE, Pass *PP,
+                            const DataLayout *DL) {
+  bool Changed = false;
+ReprocessLoop:
+
+  // Check to see that no blocks (other than the header) in this loop have
+  // predecessors that are not in the loop.  This is not valid for natural
+  // loops, but can occur if the blocks are unreachable.  Since they are
+  // unreachable we can just shamelessly delete those CFG edges!
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    if (*BB == L->getHeader()) continue;
+
+    SmallPtrSet<BasicBlock*, 4> BadPreds;
+    for (pred_iterator PI = pred_begin(*BB),
+         PE = pred_end(*BB); PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      if (!L->contains(P))
+        BadPreds.insert(P);
+    }
+
+    // Delete each unique out-of-loop (and thus dead) predecessor.
+    for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+         E = BadPreds.end(); I != E; ++I) {
+
+      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+                   << (*I)->getName() << "\n");
+
+      // Inform each successor of each dead pred.
+      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+        (*SI)->removePredecessor(*I);
+      // Zap the dead pred's terminator and replace it with unreachable.
+      TerminatorInst *TI = (*I)->getTerminator();
+       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+      (*I)->getTerminator()->eraseFromParent();
+      new UnreachableInst((*I)->getContext(), *I);
+      Changed = true;
+    }
+  }
+
+  // If there are exiting blocks with branches on undef, resolve the undef in
+  // the direction which will exit the loop. This will help simplify loop
+  // trip count computations.
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+       E = ExitingBlocks.end(); I != E; ++I)
+    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+      if (BI->isConditional()) {
+        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+                       << (*I)->getName() << "\n");
+
+          BI->setCondition(ConstantInt::get(Cond->getType(),
+                                            !L->contains(BI->getSuccessor(0))));
+
+          // This may make the loop analyzable, force SCEV recomputation.
+          if (SE)
+            SE->forgetLoop(L);
+
+          Changed = true;
+        }
+      }
+
+  // Does the loop already have a preheader?  If so, don't insert one.
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = InsertPreheaderForLoop(L, PP);
+    if (Preheader) {
+      ++NumInserted;
+      Changed = true;
+    }
+  }
+
+  // Next, check to make sure that all exit nodes of the loop only have
+  // predecessors that are inside of the loop.  This check guarantees that the
+  // loop preheader/header will dominate the exit blocks.  If the exit block has
+  // predecessors from outside of the loop, split the edge now.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+
+  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+                                               ExitBlocks.end());
+  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+         E = ExitBlockSet.end(); I != E; ++I) {
+    BasicBlock *ExitBlock = *I;
+    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+         PI != PE; ++PI)
+      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+      // allowed.
+      if (!L->contains(*PI)) {
+        if (rewriteLoopExitBlock(L, ExitBlock, PP)) {
+          ++NumInserted;
+          Changed = true;
+        }
+        break;
+      }
+  }
+
+  // If the header has more than two predecessors at this point (from the
+  // preheader and from multiple backedges), we must adjust the loop.
+  BasicBlock *LoopLatch = L->getLoopLatch();
+  if (!LoopLatch) {
+    // If this is really a nested loop, rip it out into a child loop.  Don't do
+    // this for loops with a giant number of backedges, just factor them into a
+    // common backedge instead.
+    if (L->getNumBackEdges() < 8) {
+      if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP)) {
+        ++NumNested;
+        // Enqueue the outer loop as it should be processed next in our
+        // depth-first nest walk.
+        Worklist.push_back(OuterL);
+
+        // This is a big restructuring change, reprocess the whole loop.
+        Changed = true;
+        // GCC doesn't tail recursion eliminate this.
+        // FIXME: It isn't clear we can't rely on LLVM to TRE this.
+        goto ReprocessLoop;
+      }
+    }
+
+    // If we either couldn't, or didn't want to, identify nesting of the loops,
+    // insert a new block that all backedges target, then make it jump to the
+    // loop header.
+    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI);
+    if (LoopLatch) {
+      ++NumInserted;
+      Changed = true;
+    }
+  }
+
+  // Scan over the PHI nodes in the loop header.  Since they now have only two
+  // incoming values (the loop is canonicalized), we may have simplified the PHI
+  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+  PHINode *PN;
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       (PN = dyn_cast<PHINode>(I++)); )
+    if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) {
+      if (AA) AA->deleteValue(PN);
+      if (SE) SE->forgetValue(PN);
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+    }
+
+  // If this loop has multiple exits and the exits all go to the same
+  // block, attempt to merge the exits. This helps several passes, such
+  // as LoopRotation, which do not support loops with multiple exits.
+  // SimplifyCFG also does this (and this code uses the same utility
+  // function), however this code is loop-aware, where SimplifyCFG is
+  // not. That gives it the advantage of being able to hoist
+  // loop-invariant instructions out of the way to open up more
+  // opportunities, and the disadvantage of having the responsibility
+  // to preserve dominator information.
+  bool UniqueExit = true;
+  if (!ExitBlocks.empty())
+    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+      if (ExitBlocks[i] != ExitBlocks[0]) {
+        UniqueExit = false;
+        break;
+      }
+  if (UniqueExit) {
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitingBlock = ExitingBlocks[i];
+      if (!ExitingBlock->getSinglePredecessor()) continue;
+      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+      if (!BI || !BI->isConditional()) continue;
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      if (!CI || CI->getParent() != ExitingBlock) continue;
+
+      // Attempt to hoist out all instructions except for the
+      // comparison and the branch.
+      bool AllInvariant = true;
+      bool AnyInvariant = false;
+      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+        Instruction *Inst = I++;
+        // Skip debug info intrinsics.
+        if (isa<DbgInfoIntrinsic>(Inst))
+          continue;
+        if (Inst == CI)
+          continue;
+        if (!L->makeLoopInvariant(Inst, AnyInvariant,
+                                  Preheader ? Preheader->getTerminator()
+                                            : nullptr)) {
+          AllInvariant = false;
+          break;
+        }
+      }
+      if (AnyInvariant) {
+        Changed = true;
+        // The loop disposition of all SCEV expressions that depend on any
+        // hoisted values have also changed.
+        if (SE)
+          SE->forgetLoopDispositions(L);
+      }
+      if (!AllInvariant) continue;
+
+      // The block has now been cleared of all instructions except for
+      // a comparison and a conditional branch. SimplifyCFG may be able
+      // to fold it now.
+      if (!FoldBranchToCommonDest(BI, DL)) continue;
+
+      // Success. The block is now dead, so remove it from the loop,
+      // update the dominator tree and delete it.
+      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+                   << ExitingBlock->getName() << "\n");
+
+      // Notify ScalarEvolution before deleting this block. Currently assume the
+      // parent loop doesn't change (spliting edges doesn't count). If blocks,
+      // CFG edges, or other values in the parent loop change, then we need call
+      // to forgetLoop() for the parent instead.
+      if (SE)
+        SE->forgetLoop(L);
+
+      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+      Changed = true;
+      LI->removeBlock(ExitingBlock);
+
+      DomTreeNode *Node = DT->getNode(ExitingBlock);
+      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+        Node->getChildren();
+      while (!Children.empty()) {
+        DomTreeNode *Child = Children.front();
+        DT->changeImmediateDominator(Child, Node->getIDom());
+      }
+      DT->eraseNode(ExitingBlock);
+
+      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+      ExitingBlock->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
+                        AliasAnalysis *AA, ScalarEvolution *SE,
+                        const DataLayout *DL) {
+  bool Changed = false;
+
+  // Worklist maintains our depth-first queue of loops in this nest to process.
+  SmallVector<Loop *, 4> Worklist;
+  Worklist.push_back(L);
+
+  // Walk the worklist from front to back, pushing newly found sub loops onto
+  // the back. This will let us process loops from back to front in depth-first
+  // order. We can use this simple process because loops form a tree.
+  for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+    Loop *L2 = Worklist[Idx];
+    for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I)
+      Worklist.push_back(*I);
+  }
+
+  while (!Worklist.empty())
+    Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
+                               SE, PP, DL);
+
+  return Changed;
+}
+
+namespace {
+  struct LoopSimplify : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    LoopSimplify() : FunctionPass(ID) {
+      initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    // AA - If we have an alias analysis object to update, this is it, otherwise
+    // this is null.
+    AliasAnalysis *AA;
+    DominatorTree *DT;
+    LoopInfo *LI;
+    ScalarEvolution *SE;
+    const DataLayout *DL;
+
+    bool runOnFunction(Function &F) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      // We need loop information to identify the loops...
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<DependenceAnalysis>();
+      AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
+    }
+
+    /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+    void verifyAnalysis() const override;
+  };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnFunction(Function &F) {
+  bool Changed = false;
+  AA = getAnalysisIfAvailable<AliasAnalysis>();
+  LI = &getAnalysis<LoopInfo>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  SE = getAnalysisIfAvailable<ScalarEvolution>();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
+
+  // Simplify each loop nest in the function.
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL);
+
+  return Changed;
+}
+
+// FIXME: Restore this code when we re-enable verification in verifyAnalysis
+// below.
+#if 0
+static void verifyLoop(Loop *L) {
+  // Verify subloops.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    verifyLoop(*I);
+
   // It used to be possible to just assert L->isLoopSimplifyForm(), however
   // with the introduction of indirectbr, there are now cases where it's
   // not possible to transform a loop as necessary. We can at least check
@@ -799,3 +846,15 @@ void LoopSimplify::verifyAnalysis() const {
     (void)HasIndBrExiting;
   }
 }
+#endif
+
+void LoopSimplify::verifyAnalysis() const {
+  // FIXME: This routine is being called mid-way through the loop pass manager
+  // as loop passes destroy this analysis. That's actually fine, but we have no
+  // way of expressing that here. Once all of the passes that destroy this are
+  // hoisted out of the loop pass manager we can add back verification here.
+#if 0
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    verifyLoop(*I);
+#endif
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 162807d..ab1c25a 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -16,22 +16,29 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "loop-unroll"
+
 // TODO: Should these be here or in LoopUnroll?
 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
@@ -58,18 +65,23 @@ static inline void RemapInstruction(Instruction *I,
 
 /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
 /// only has one predecessor, and that predecessor only has one successor.
-/// The LoopInfo Analysis that is passed will be kept consistent.
-/// Returns the new combined block.
-static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
-                                            LPPassManager *LPM) {
+/// The LoopInfo Analysis that is passed will be kept consistent.  If folding is
+/// successful references to the containing loop must be removed from
+/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have
+/// references to the eliminated BB.  The argument ForgottenLoops contains a set
+/// of loops that have already been forgotten to prevent redundant, expensive
+/// calls to ScalarEvolution::forgetLoop.  Returns the new combined block.
+static BasicBlock *
+FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
+                         SmallPtrSetImpl<Loop *> &ForgottenLoops) {
   // Merge basic blocks into their predecessor if there is only one distinct
   // pred, and if there is only one distinct successor of the predecessor, and
   // if there are no PHI nodes.
   BasicBlock *OnlyPred = BB->getSinglePredecessor();
-  if (!OnlyPred) return 0;
+  if (!OnlyPred) return nullptr;
 
   if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
-    return 0;
+    return nullptr;
 
   DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
 
@@ -98,8 +110,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
   // ScalarEvolution holds references to loop exit blocks.
   if (LPM) {
     if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
-      if (Loop *L = LI->getLoopFor(BB))
-        SE->forgetLoop(L);
+      if (Loop *L = LI->getLoopFor(BB)) {
+        if (ForgottenLoops.insert(L))
+          SE->forgetLoop(L);
+      }
     }
   }
   LI->removeBlock(BB);
@@ -137,10 +151,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
 /// removed from the LoopPassManager as well. LPM can also be NULL.
 ///
 /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
-/// available it must also preserve those analyses.
+/// available from the Pass it must also preserve those analyses.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                       bool AllowRuntime, unsigned TripMultiple,
-                      LoopInfo *LI, LPPassManager *LPM) {
+                      LoopInfo *LI, Pass *PP, LPPassManager *LPM) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -208,8 +222,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
 
   // Notify ScalarEvolution that the loop will be substantially changed,
   // if not outright eliminated.
-  if (LPM) {
-    ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+  if (PP) {
+    ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
     if (SE)
       SE->forgetLoop(L);
   }
@@ -225,18 +239,35 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
       (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
   }
 
+  // Report the unrolling decision.
+  DebugLoc LoopLoc = L->getStartLoc();
+  Function *F = Header->getParent();
+  LLVMContext &Ctx = F->getContext();
+
   if (CompletelyUnroll) {
     DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
           << " with trip count " << TripCount << "!\n");
+    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
+                           Twine("completely unrolled loop with ") +
+                               Twine(TripCount) + " iterations");
   } else {
+    auto EmitDiag = [&](const Twine &T) {
+      emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
+                             "unrolled loop by a factor of " + Twine(Count) +
+                                 T);
+    };
+
     DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
           << " by " << Count);
     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
       DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+      EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip));
     } else if (TripMultiple != 1) {
       DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+      EmitDiag(" with " + Twine(TripMultiple) + " trips per branch");
     } else if (RuntimeTripCount) {
       DEBUG(dbgs() << " with run-time trip count");
+      EmitDiag(" with run-time trip count");
     }
     DEBUG(dbgs() << "!\n");
   }
@@ -400,23 +431,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   }
 
   // Merge adjacent basic blocks, if possible.
+  SmallPtrSet<Loop *, 4> ForgottenLoops;
   for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
     BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
     if (Term->isUnconditional()) {
       BasicBlock *Dest = Term->getSuccessor(0);
-      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
+      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM,
+                                                      ForgottenLoops))
         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
     }
   }
 
-  if (LPM) {
+  DominatorTree *DT = nullptr;
+  if (PP) {
     // FIXME: Reconstruct dom info, because it is not preserved properly.
     // Incrementally updating domtree after loop unrolling would be easy.
-    if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
-      DT->runOnFunction(*L->getHeader()->getParent());
+    if (DominatorTreeWrapperPass *DTWP =
+            PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+      DT = &DTWP->getDomTree();
+      DT->recalculate(*L->getHeader()->getParent());
+    }
 
     // Simplify any new induction variables in the partially unrolled loop.
-    ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+    ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
     if (SE && !CompletelyUnroll) {
       SmallVector<WeakVH, 16> DeadInsts;
       simplifyLoopIVs(L, SE, LPM, DeadInsts);
@@ -449,9 +486,36 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
 
   NumCompletelyUnrolled += CompletelyUnroll;
   ++NumUnrolled;
+
+  Loop *OuterL = L->getParentLoop();
   // Remove the loop from the LoopPassManager if it's completely removed.
-  if (CompletelyUnroll && LPM != NULL)
+  if (CompletelyUnroll && LPM != nullptr)
     LPM->deleteLoopFromQueue(L);
 
+  // If we have a pass and a DominatorTree we should re-simplify impacted loops
+  // to ensure subsequent analyses can rely on this form. We want to simplify
+  // at least one layer outside of the loop that was unrolled so that any
+  // changes to the parent loop exposed by the unrolling are considered.
+  if (PP && DT) {
+    if (!OuterL && !CompletelyUnroll)
+      OuterL = L;
+    if (OuterL) {
+      DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>();
+      const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
+      ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
+      simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL);
+
+      // LCSSA must be performed on the outermost affected loop. The unrolled
+      // loop's last loop latch is guaranteed to be in the outermost loop after
+      // deleteLoopFromQueue updates LoopInfo.
+      Loop *LatchLoop = LI->getLoopFor(Latches.back());
+      if (!OuterL->contains(LatchLoop))
+        while (OuterL->getParentLoop() != LatchLoop)
+          OuterL = OuterL->getParentLoop();
+
+      formLCSSARecursively(*OuterL, *DT, SE);
+    }
+  }
+
   return true;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d801d5f..a96c46a 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -21,7 +21,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopIterator.h"
@@ -37,6 +36,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "loop-unroll"
+
 STATISTIC(NumRuntimeUnrolled,
           "Number of loops unrolled with run-time trip counts");
 
@@ -58,7 +59,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
                           BasicBlock *OrigPH, BasicBlock *NewPH,
                           ValueToValueMapTy &LVMap, Pass *P) {
   BasicBlock *Latch = L->getLoopLatch();
-  assert(Latch != 0 && "Loop must have a latch");
+  assert(Latch && "Loop must have a latch");
 
   // Create a PHI node for each outgoing value from the original loop
   // (which means it is an outgoing value from the prolog code too).
@@ -110,7 +111,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
     new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
                  ConstantInt::get(TripCount->getType(), Count));
   BasicBlock *Exit = L->getUniqueExitBlock();
-  assert(Exit != 0 && "Loop must have a single exit block only");
+  assert(Exit && "Loop must have a single exit block only");
   // Split the exit to maintain loop canonicalization guarantees
   SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
   if (!Exit->isLandingPad()) {
@@ -232,7 +233,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
 
   // Make sure the loop is in canonical form, and there is a single
   // exit block only.
-  if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+  if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
     return false;
 
   // Use Scalar Evolution to compute the trip count.  This allows more
@@ -240,7 +241,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   if (!LPM)
     return false;
   ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
-  if (SE == 0)
+  if (!SE)
     return false;
 
   // Only unroll loops with a computable trip count and the trip count needs
@@ -279,17 +280,17 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   SCEVExpander Expander(*SE, "loop-unroll");
   Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                             PreHeaderBR);
-  Type *CountTy = TripCount->getType();
-  BinaryOperator *ModVal =
-    BinaryOperator::CreateURem(TripCount,
-                               ConstantInt::get(CountTy, Count),
-                               "xtraiter");
-  ModVal->insertBefore(PreHeaderBR);
-
-  // Check if for no extra iterations, then jump to unrolled loop
-  Value *BranchVal = new ICmpInst(PreHeaderBR,
-                                  ICmpInst::ICMP_NE, ModVal,
-                                  ConstantInt::get(CountTy, 0), "lcmp");
+
+  IRBuilder<> B(PreHeaderBR);
+  Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
+
+  // Check if for no extra iterations, then jump to unrolled loop.  We have to
+  // check that the trip count computation didn't overflow when adding one to
+  // the backedge taken count.
+  Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod");
+  Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow");
+  Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or");
+
   // Branch to either the extra iterations or the unrolled loop
   // We will fix up the true branch label when adding loop body copies
   BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
@@ -301,7 +302,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   ValueToValueMapTy LVMap;
   Function *F = Header->getParent();
   // These variables are used to update the CFG links in each iteration
-  BasicBlock *CompareBB = 0;
+  BasicBlock *CompareBB = nullptr;
   BasicBlock *LastLoopBB = PH;
   // Get an ordered list of blocks in the loop to help with the ordering of the
   // cloned blocks in the prolog code
@@ -343,6 +344,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
       }
 
       // The comparison w/ the extra iteration value and branch
+      Type *CountTy = TripCount->getType();
       Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
                                       ConstantInt::get(CountTy, leftOverIters),
                                       "un.tmp");
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index e017f50..ff89e74 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "lower-expect-intrinsic"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/BasicBlock.h"
@@ -29,6 +28,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "lower-expect-intrinsic"
+
 STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
 
 static cl::opt<uint32_t>
@@ -52,7 +53,7 @@ namespace {
       initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
     }
 
-    bool runOnFunction(Function &F);
+    bool runOnFunction(Function &F) override;
   };
 }
 
@@ -94,15 +95,25 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
     return false;
 
   // Handle non-optimized IR code like:
-  //   %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+  //   %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
   //   %tobool = icmp ne i64 %expval, 0
   //   br i1 %tobool, label %if.then, label %if.end
+  //
+  // Or the following simpler case:
+  //   %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1)
+  //   br i1 %expval, label %if.then, label %if.end
+
+  CallInst *CI;
 
   ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
-  if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
-    return false;
+  if (!CmpI) {
+    CI = dyn_cast<CallInst>(BI->getCondition());
+  } else {
+    if (CmpI->getPredicate() != CmpInst::ICMP_NE)
+      return false;
+    CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+  }
 
-  CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
   if (!CI)
     return false;
 
@@ -127,7 +138,10 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
 
   BI->setMetadata(LLVMContext::MD_prof, Node);
 
-  CmpI->setOperand(0, ArgValue);
+  if (CmpI)
+    CmpI->setOperand(0, ArgValue);
+  else
+    BI->setCondition(ArgValue);
   return true;
 }
 
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index 9799a30..66d57b0 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -1,4 +1,4 @@
-//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,94 +8,34 @@
 //===----------------------------------------------------------------------===//
 //
 // This transformation is designed for use by code generators which do not yet
-// support stack unwinding.  This pass supports two models of exception handling
-// lowering, the 'cheap' support and the 'expensive' support.
-//
-// 'Cheap' exception handling support gives the program the ability to execute
-// any program which does not "throw an exception", by turning 'invoke'
-// instructions into calls and by turning 'unwind' instructions into calls to
-// abort().  If the program does dynamically use the unwind instruction, the
-// program will print a message then abort.
-//
-// 'Expensive' exception handling support gives the full exception handling
-// support to the program at the cost of making the 'invoke' instruction
-// really expensive.  It basically inserts setjmp/longjmp calls to emulate the
-// exception handling as necessary.
-//
-// Because the 'expensive' support slows down programs a lot, and EH is only
-// used for a subset of the programs, it must be specifically enabled by an
-// option.
-//
-// Note that after this pass runs the CFG is not entirely accurate (exceptional
-// control flow edges are not correct anymore) so only very simple things should
-// be done after the lowerinvoke pass has run (like generation of native code).
-// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
-// support the invoke instruction yet" lowering pass.
+// support stack unwinding.  This pass converts 'invoke' instructions to 'call'
+// instructions, so that any exception-handling 'landingpad' blocks become dead
+// code (which can be removed by running the '-simplifycfg' pass afterwards).
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "lowerinvoke"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <csetjmp>
-#include <set>
 using namespace llvm;
 
-STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+#define DEBUG_TYPE "lowerinvoke"
 
-static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
- cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+STATISTIC(NumInvokes, "Number of invokes replaced");
 
 namespace {
   class LowerInvoke : public FunctionPass {
-    const TargetMachine *TM;
-
-    // Used for both models.
-    Constant *AbortFn;
-
-    // Used for expensive EH support.
-    StructType *JBLinkTy;
-    GlobalVariable *JBListHead;
-    Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
-    bool useExpensiveEHSupport;
-
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LowerInvoke(const TargetMachine *TM = 0,
-                         bool useExpensiveEHSupport = ExpensiveEHSupport)
-      : FunctionPass(ID), TM(TM),
-        useExpensiveEHSupport(useExpensiveEHSupport) {
+    explicit LowerInvoke() : FunctionPass(ID) {
       initializeLowerInvokePass(*PassRegistry::getPassRegistry());
     }
-    bool doInitialization(Module &M);
-    bool runOnFunction(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      // This is a cluster of orthogonal Transforms
-      AU.addPreserved("mem2reg");
-      AU.addPreservedID(LowerSwitchID);
-    }
-
-  private:
-    bool insertCheapEHSupport(Function &F);
-    void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
-    void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
-                                AllocaInst *InvokeNum, AllocaInst *StackPtr,
-                                SwitchInst *CatchSwitch);
-    bool insertExpensiveEHSupport(Function &F);
+    bool runOnFunction(Function &F) override;
   };
 }
 
@@ -107,65 +47,11 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
 char &llvm::LowerInvokePassID = LowerInvoke::ID;
 
 // Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM,
-                                          bool useExpensiveEHSupport) {
-  return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass() {
+  return new LowerInvoke();
 }
 
-// doInitialization - Make sure that there is a prototype for abort in the
-// current module.
-bool LowerInvoke::doInitialization(Module &M) {
-  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-  if (useExpensiveEHSupport) {
-    // Insert a type for the linked list of jump buffers.
-    const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
-    unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
-    JBSize = JBSize ? JBSize : 200;
-    Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
-
-    JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty");
-    Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
-    JBLinkTy->setBody(Elts);
-
-    Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
-
-    // Now that we've done that, insert the jmpbuf list head global, unless it
-    // already exists.
-    if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
-      JBListHead = new GlobalVariable(M, PtrJBList, false,
-                                      GlobalValue::LinkOnceAnyLinkage,
-                                      Constant::getNullValue(PtrJBList),
-                                      "llvm.sjljeh.jblist");
-    }
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
-                         !defined(setjmp_undefined_for_msvc)
-#  pragma push_macro("setjmp")
-#  undef setjmp
-#  define setjmp_undefined_for_msvc
-#endif
-
-    SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
-   // let's return it to _setjmp state
-#  pragma pop_macro("setjmp")
-#  undef setjmp_undefined_for_msvc
-#endif
-
-    LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
-    StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
-    StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
-  }
-
-  // We need the 'write' and 'abort' functions for both models.
-  AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
-                                  (Type *)0);
-  return true;
-}
-
-bool LowerInvoke::insertCheapEHSupport(Function &F) {
+bool LowerInvoke::runOnFunction(Function &F) {
   bool Changed = false;
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
@@ -192,388 +78,3 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
     }
   return Changed;
 }
-
-/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
-/// specified invoke instruction with a call.
-void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
-                                         AllocaInst *InvokeNum,
-                                         AllocaInst *StackPtr,
-                                         SwitchInst *CatchSwitch) {
-  ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
-                                            InvokeNo);
-
-  // If the unwind edge has phi nodes, split the edge.
-  if (isa<PHINode>(II->getUnwindDest()->begin())) {
-    SplitCriticalEdge(II, 1, this);
-
-    // If there are any phi nodes left, they must have a single predecessor.
-    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
-      PN->replaceAllUsesWith(PN->getIncomingValue(0));
-      PN->eraseFromParent();
-    }
-  }
-
-  // Insert a store of the invoke num before the invoke and store zero into the
-  // location afterward.
-  new StoreInst(InvokeNoC, InvokeNum, true, II);  // volatile
-  
-  // Insert a store of the stack ptr before the invoke, so we can restore it
-  // later in the exception case.
-  CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
-  new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
-
-  BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt();
-  // nonvolatile.
-  new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
-                InvokeNum, false, NI);
-
-  Instruction* StackPtrLoad =
-    new LoadInst(StackPtr, "stackptr.restore", true,
-                 II->getUnwindDest()->getFirstInsertionPt());
-  CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
-    
-  // Add a switch case to our unwind block.
-  CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
-
-  // Insert a normal call instruction.
-  SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                       CallArgs, "", II);
-  NewCall->takeName(II);
-  NewCall->setCallingConv(II->getCallingConv());
-  NewCall->setAttributes(II->getAttributes());
-  NewCall->setDebugLoc(II->getDebugLoc());
-  II->replaceAllUsesWith(NewCall);
-
-  // Replace the invoke with an uncond branch.
-  BranchInst::Create(II->getNormalDest(), NewCall->getParent());
-  II->eraseFromParent();
-}
-
-/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
-/// we reach blocks we've already seen.
-static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
-  if (!LiveBBs.insert(BB).second) return; // already been here.
-
-  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-    MarkBlocksLiveIn(*PI, LiveBBs);
-}
-
-// First thing we need to do is scan the whole function for values that are
-// live across unwind edges.  Each value that is live across an unwind edge
-// we spill into a stack location, guaranteeing that there is nothing live
-// across the unwind edge.  This process also splits all critical edges
-// coming out of invoke's.
-void LowerInvoke::
-splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
-  // First step, split all critical edges from invoke instructions.
-  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
-    InvokeInst *II = Invokes[i];
-    SplitCriticalEdge(II, 0, this);
-    SplitCriticalEdge(II, 1, this);
-    assert(!isa<PHINode>(II->getNormalDest()) &&
-           !isa<PHINode>(II->getUnwindDest()) &&
-           "critical edge splitting left single entry phi nodes?");
-  }
-
-  Function *F = Invokes.back()->getParent()->getParent();
-
-  // To avoid having to handle incoming arguments specially, we lower each arg
-  // to a copy instruction in the entry block.  This ensures that the argument
-  // value itself cannot be live across the entry block.
-  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
-  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
-        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
-    ++AfterAllocaInsertPt;
-  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
-       AI != E; ++AI) {
-    Type *Ty = AI->getType();
-    // Aggregate types can't be cast, but are legal argument types, so we have
-    // to handle them differently. We use an extract/insert pair as a
-    // lightweight method to achieve the same goal.
-    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
-      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
-      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
-      NI->insertAfter(EI);
-      AI->replaceAllUsesWith(NI);
-      // Set the operand of the instructions back to the AllocaInst.
-      EI->setOperand(0, AI);
-      NI->setOperand(0, AI);
-    } else {
-      // This is always a no-op cast because we're casting AI to AI->getType()
-      // so src and destination types are identical. BitCast is the only
-      // possibility.
-      CastInst *NC = new BitCastInst(
-        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
-      AI->replaceAllUsesWith(NC);
-      // Set the operand of the cast instruction back to the AllocaInst.
-      // Normally it's forbidden to replace a CastInst's operand because it
-      // could cause the opcode to reflect an illegal conversion. However,
-      // we're replacing it here with the same value it was constructed with.
-      // We do this because the above replaceAllUsesWith() clobbered the
-      // operand, but we want this one to remain.
-      NC->setOperand(0, AI);
-    }
-  }
-
-  // Finally, scan the code looking for instructions with bad live ranges.
-  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
-      // Ignore obvious cases we don't have to handle.  In particular, most
-      // instructions either have no uses or only have a single use inside the
-      // current block.  Ignore them quickly.
-      Instruction *Inst = II;
-      if (Inst->use_empty()) continue;
-      if (Inst->hasOneUse() &&
-          cast<Instruction>(Inst->use_back())->getParent() == BB &&
-          !isa<PHINode>(Inst->use_back())) continue;
-
-      // If this is an alloca in the entry block, it's not a real register
-      // value.
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
-        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
-          continue;
-
-      // Avoid iterator invalidation by copying users to a temporary vector.
-      SmallVector<Instruction*,16> Users;
-      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
-           UI != E; ++UI) {
-        Instruction *User = cast<Instruction>(*UI);
-        if (User->getParent() != BB || isa<PHINode>(User))
-          Users.push_back(User);
-      }
-
-      // Scan all of the uses and see if the live range is live across an unwind
-      // edge.  If we find a use live across an invoke edge, create an alloca
-      // and spill the value.
-
-      // Find all of the blocks that this value is live in.
-      std::set<BasicBlock*> LiveBBs;
-      LiveBBs.insert(Inst->getParent());
-      while (!Users.empty()) {
-        Instruction *U = Users.back();
-        Users.pop_back();
-
-        if (!isa<PHINode>(U)) {
-          MarkBlocksLiveIn(U->getParent(), LiveBBs);
-        } else {
-          // Uses for a PHI node occur in their predecessor block.
-          PHINode *PN = cast<PHINode>(U);
-          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-            if (PN->getIncomingValue(i) == Inst)
-              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
-        }
-      }
-
-      // Now that we know all of the blocks that this thing is live in, see if
-      // it includes any of the unwind locations.
-      bool NeedsSpill = false;
-      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
-        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
-        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
-          NeedsSpill = true;
-        }
-      }
-
-      // If we decided we need a spill, do it.
-      if (NeedsSpill) {
-        ++NumSpilled;
-        DemoteRegToStack(*Inst, true);
-      }
-    }
-}
-
-bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
-  SmallVector<ReturnInst*,16> Returns;
-  SmallVector<InvokeInst*,16> Invokes;
-  UnreachableInst* UnreachablePlaceholder = 0;
-
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
-      // Remember all return instructions in case we insert an invoke into this
-      // function.
-      Returns.push_back(RI);
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
-      Invokes.push_back(II);
-    }
-
-  if (Invokes.empty()) return false;
-
-  NumInvokes += Invokes.size();
-
-  // TODO: This is not an optimal way to do this.  In particular, this always
-  // inserts setjmp calls into the entries of functions with invoke instructions
-  // even though there are possibly paths through the function that do not
-  // execute any invokes.  In particular, for functions with early exits, e.g.
-  // the 'addMove' method in hexxagon, it would be nice to not have to do the
-  // setjmp stuff on the early exit path.  This requires a bit of dataflow, but
-  // would not be too hard to do.
-
-  // If we have an invoke instruction, insert a setjmp that dominates all
-  // invokes.  After the setjmp, use a cond branch that goes to the original
-  // code path on zero, and to a designated 'catch' block of nonzero.
-  Value *OldJmpBufPtr = 0;
-  if (!Invokes.empty()) {
-    // First thing we need to do is scan the whole function for values that are
-    // live across unwind edges.  Each value that is live across an unwind edge
-    // we spill into a stack location, guaranteeing that there is nothing live
-    // across the unwind edge.  This process also splits all critical edges
-    // coming out of invoke's.
-    splitLiveRangesLiveAcrossInvokes(Invokes);
-
-    BasicBlock *EntryBB = F.begin();
-
-    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
-    // that needs to be restored on all exits from the function.  This is an
-    // alloca because the value needs to be live across invokes.
-    const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
-    unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
-    AllocaInst *JmpBuf =
-      new AllocaInst(JBLinkTy, 0, Align,
-                     "jblink", F.begin()->begin());
-
-    Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
-                     ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
-    OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf",
-                                             EntryBB->getTerminator());
-
-    // Copy the JBListHead to the alloca.
-    Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
-                                 EntryBB->getTerminator());
-    new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
-
-    // Add the new jumpbuf to the list.
-    new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
-
-    // Create the catch block.  The catch block is basically a big switch
-    // statement that goes to all of the invoke catch blocks.
-    BasicBlock *CatchBB =
-            BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
-
-    // Create an alloca which keeps track of the stack pointer before every
-    // invoke, this allows us to properly restore the stack pointer after
-    // long jumping.
-    AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
-                                          "stackptr", EntryBB->begin());
-
-    // Create an alloca which keeps track of which invoke is currently
-    // executing.  For normal calls it contains zero.
-    AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
-                                           "invokenum",EntryBB->begin());
-    new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), 
-                  InvokeNum, true, EntryBB->getTerminator());
-
-    // Insert a load in the Catch block, and a switch on its value.  By default,
-    // we go to a block that just does an unwind (which is the correct action
-    // for a standard call). We insert an unreachable instruction here and
-    // modify the block to jump to the correct unwinding pad later.
-    BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
-    UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB);
-
-    Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
-    SwitchInst *CatchSwitch =
-      SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
-
-    // Now that things are set up, insert the setjmp call itself.
-
-    // Split the entry block to insert the conditional branch for the setjmp.
-    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
-                                                     "setjmp.cont");
-
-    Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
-    Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf",
-                                                 EntryBB->getTerminator());
-    JmpBufPtr = new BitCastInst(JmpBufPtr,
-                        Type::getInt8PtrTy(F.getContext()),
-                                "tmp", EntryBB->getTerminator());
-    Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
-                                    EntryBB->getTerminator());
-
-    // Compare the return value to zero.
-    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
-                                   ICmpInst::ICMP_EQ, SJRet,
-                                   Constant::getNullValue(SJRet->getType()),
-                                   "notunwind");
-    // Nuke the uncond branch.
-    EntryBB->getTerminator()->eraseFromParent();
-
-    // Put in a new condbranch in its place.
-    BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
-
-    // At this point, we are all set up, rewrite each invoke instruction.
-    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
-      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
-  }
-
-  // We know that there is at least one unwind.
-
-  // Create three new blocks, the block to load the jmpbuf ptr and compare
-  // against null, the block to do the longjmp, and the error block for if it
-  // is null.  Add them at the end of the function because they are not hot.
-  BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
-                                                "dounwind", &F);
-  BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
-  BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
-
-  // If this function contains an invoke, restore the old jumpbuf ptr.
-  Value *BufPtr;
-  if (OldJmpBufPtr) {
-    // Before the return, insert a copy from the saved value to the new value.
-    BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
-    new StoreInst(BufPtr, JBListHead, UnwindHandler);
-  } else {
-    BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
-  }
-
-  // Load the JBList, if it's null, then there was no catch!
-  Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
-                                Constant::getNullValue(BufPtr->getType()),
-                                "notnull");
-  BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
-
-  // Create the block to do the longjmp.
-  // Get a pointer to the jmpbuf and longjmp.
-  Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
-                   ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
-  Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock);
-  Idx[0] = new BitCastInst(Idx[0],
-             Type::getInt8PtrTy(F.getContext()),
-                           "tmp", UnwindBlock);
-  Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
-  CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
-  new UnreachableInst(F.getContext(), UnwindBlock);
-
-  // Set up the term block ("throw without a catch").
-  new UnreachableInst(F.getContext(), TermBlock);
-
-  // Insert a call to abort()
-  CallInst::Create(AbortFn, "",
-                   TermBlock->getTerminator())->setTailCall();
-
-  // Replace the inserted unreachable with a branch to the unwind handler.
-  if (UnreachablePlaceholder) {
-    BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
-    UnreachablePlaceholder->eraseFromParent();
-  }
-
-  // Finally, for any returns from this function, if this function contains an
-  // invoke, restore the old jmpbuf pointer to its input value.
-  if (OldJmpBufPtr) {
-    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
-      ReturnInst *R = Returns[i];
-
-      // Before the return, insert a copy from the saved value to the new value.
-      Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
-      new StoreInst(OldBuf, JBListHead, true, R);
-    }
-  }
-
-  return true;
-}
-
-bool LowerInvoke::runOnFunction(Function &F) {
-  if (useExpensiveEHSupport)
-    return insertExpensiveEHSupport(F);
-  else
-    return insertCheapEHSupport(F);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 2d2a8a5..d6e5bb6 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -14,11 +14,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
@@ -27,6 +29,8 @@
 #include <algorithm>
 using namespace llvm;
 
+#define DEBUG_TYPE "lower-switch"
+
 namespace {
   /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
   /// instructions.
@@ -37,9 +41,9 @@ namespace {
       initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
     } 
 
-    virtual bool runOnFunction(Function &F);
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    bool runOnFunction(Function &F) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       // This is a cluster of orthogonal Transforms
       AU.addPreserved<UnifyFunctionExitNodes>();
       AU.addPreserved("mem2reg");
@@ -51,20 +55,23 @@ namespace {
       Constant* High;
       BasicBlock* BB;
 
-      CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+      CaseRange(Constant *low = nullptr, Constant *high = nullptr,
+                BasicBlock *bb = nullptr) :
         Low(low), High(high), BB(bb) { }
     };
 
-    typedef std::vector<CaseRange>           CaseVector;
+    typedef std::vector<CaseRange> CaseVector;
     typedef std::vector<CaseRange>::iterator CaseItr;
   private:
     void processSwitchInst(SwitchInst *SI);
 
-    BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
-                              BasicBlock* OrigBlock, BasicBlock* Default);
-    BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
-                             BasicBlock* OrigBlock, BasicBlock* Default);
-    unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+    BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
+                              ConstantInt *LowerBound, ConstantInt *UpperBound,
+                              Value *Val, BasicBlock *Predecessor,
+                              BasicBlock *OrigBlock, BasicBlock *Default);
+    BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
+                             BasicBlock *Default);
+    unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
   };
 
   /// The comparison function for sorting the switch case values in the vector.
@@ -124,17 +131,45 @@ static raw_ostream& operator<<(raw_ostream &O,
   return O << "]";
 }
 
+static void fixPhis(BasicBlock *Succ,
+                    BasicBlock *OrigBlock,
+                    BasicBlock *NewNode) {
+  for (BasicBlock::iterator I = Succ->begin(),
+                            E = Succ->getFirstNonPHI();
+       I != E; ++I) {
+    PHINode *PN = cast<PHINode>(I);
+
+    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+      if (PN->getIncomingBlock(I) == OrigBlock)
+        PN->setIncomingBlock(I, NewNode);
+    }
+  }
+}
+
 // switchConvert - Convert the switch statement into a binary lookup of
 // the case values. The function recursively builds this tree.
-//
-BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
-                                       Value* Val, BasicBlock* OrigBlock,
-                                       BasicBlock* Default)
-{
+// LowerBound and UpperBound are used to keep track of the bounds for Val
+// that have already been checked by a block emitted by one of the previous
+// calls to switchConvert in the call stack.
+BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+                                       ConstantInt *LowerBound,
+                                       ConstantInt *UpperBound, Value *Val,
+                                       BasicBlock *Predecessor,
+                                       BasicBlock *OrigBlock,
+                                       BasicBlock *Default) {
   unsigned Size = End - Begin;
 
-  if (Size == 1)
+  if (Size == 1) {
+    // Check if the Case Range is perfectly squeezed in between
+    // already checked Upper and Lower bounds. If it is then we can avoid
+    // emitting the code that checks if the value actually falls in the range
+    // because the bounds already tell us so.
+    if (Begin->Low == LowerBound && Begin->High == UpperBound) {
+      fixPhis(Begin->BB, OrigBlock, Predecessor);
+      return Begin->BB;
+    }
     return newLeafBlock(*Begin, Val, OrigBlock, Default);
+  }
 
   unsigned Mid = Size / 2;
   std::vector<CaseRange> LHS(Begin, Begin + Mid);
@@ -142,26 +177,65 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
   std::vector<CaseRange> RHS(Begin + Mid, End);
   DEBUG(dbgs() << "RHS: " << RHS << "\n");
 
-  CaseRange& Pivot = *(Begin + Mid);
-  DEBUG(dbgs() << "Pivot ==> " 
-               << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
-               << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+  CaseRange &Pivot = *(Begin + Mid);
+  DEBUG(dbgs() << "Pivot ==> "
+               << cast<ConstantInt>(Pivot.Low)->getValue()
+               << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+  // NewLowerBound here should never be the integer minimal value.
+  // This is because it is computed from a case range that is never
+  // the smallest, so there is always a case range that has at least
+  // a smaller value.
+  ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low);
+  ConstantInt *NewUpperBound;
+
+  // If we don't have a Default block then it means that we can never
+  // have a value outside of a case range, so set the UpperBound to the highest
+  // value in the LHS part of the case ranges.
+  if (Default != nullptr) {
+    // Because NewLowerBound is never the smallest representable integer
+    // it is safe here to subtract one.
+    NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
+                                     NewLowerBound->getValue() - 1);
+  } else {
+    CaseItr LastLHS = LHS.begin() + LHS.size() - 1;
+    NewUpperBound = cast<ConstantInt>(LastLHS->High);
+  }
 
-  BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
-                                      OrigBlock, Default);
-  BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
-                                      OrigBlock, Default);
+  DEBUG(dbgs() << "LHS Bounds ==> ";
+        if (LowerBound) {
+          dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue();
+        } else {
+          dbgs() << "NONE";
+        }
+        dbgs() << " - " << NewUpperBound->getSExtValue() << "\n";
+        dbgs() << "RHS Bounds ==> ";
+        dbgs() << NewLowerBound->getSExtValue() << " - ";
+        if (UpperBound) {
+          dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n";
+        } else {
+          dbgs() << "NONE\n";
+        });
 
   // Create a new node that checks if the value is < pivot. Go to the
   // left branch if it is and right branch if not.
   Function* F = OrigBlock->getParent();
   BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
-  Function::iterator FI = OrigBlock;
-  F->getBasicBlockList().insert(++FI, NewNode);
 
   ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
                                 Val, Pivot.Low, "Pivot");
+
+  BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
+                                      NewUpperBound, Val, NewNode, OrigBlock,
+                                      Default);
+  BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
+                                      UpperBound, Val, NewNode, OrigBlock,
+                                      Default);
+
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewNode);
   NewNode->getInstList().push_back(Comp);
+
   BranchInst::Create(LBranch, RBranch, Comp, NewNode);
   return NewNode;
 }
@@ -182,7 +256,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
   F->getBasicBlockList().insert(++FI, NewLeaf);
 
   // Emit comparison
-  ICmpInst* Comp = NULL;
+  ICmpInst* Comp = nullptr;
   if (Leaf.Low == Leaf.High) {
     // Make the seteq instruction...
     Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
@@ -245,7 +319,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
 
   // Merge case into clusters
   if (Cases.size()>=2)
-    for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+    for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
+         J != Cases.end();) {
       int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
       int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
       BasicBlock* nextBB = J->BB;
@@ -287,13 +362,19 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
     return;
   }
 
+  const bool DefaultIsUnreachable =
+      Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator());
   // Create a new, empty default block so that the new hierarchy of
   // if-then statements go to this and the PHI nodes are happy.
-  BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
-  F->getBasicBlockList().insert(Default, NewDefault);
-
-  BranchInst::Create(Default, NewDefault);
-
+  // if the default block is set as an unreachable we avoid creating one
+  // because will never be a valid target.
+  BasicBlock *NewDefault = nullptr;
+  if (!DefaultIsUnreachable) {
+    NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+    F->getBasicBlockList().insert(Default, NewDefault);
+
+    BranchInst::Create(Default, NewDefault);
+  }
   // If there is an entry in any PHI nodes for the default edge, make sure
   // to update them as well.
   for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
@@ -312,12 +393,31 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   DEBUG(dbgs() << "Cases: " << Cases << "\n");
   (void)numCmps;
   
-  BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
-                                          OrigBlock, NewDefault);
+  ConstantInt *UpperBound = nullptr;
+  ConstantInt *LowerBound = nullptr;
+
+  // Optimize the condition where Default is an unreachable block. In this case
+  // we can make the bounds tightly fitted around the case value ranges,
+  // because we know that the value passed to the switch should always be
+  // exactly one of the case values.
+  if (DefaultIsUnreachable) {
+    CaseItr LastCase = Cases.begin() + Cases.size() - 1;
+    UpperBound = cast<ConstantInt>(LastCase->High);
+    LowerBound = cast<ConstantInt>(Cases.begin()->Low);
+  }
+  BasicBlock *SwitchBlock =
+      switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
+                    OrigBlock, OrigBlock, NewDefault);
 
   // Branch to our shiny new if-then stuff...
   BranchInst::Create(SwitchBlock, OrigBlock);
 
   // We are now done with the switch instruction, delete it.
   CurBlock->getInstList().erase(SI);
+
+  pred_iterator PI = pred_begin(Default), E = pred_end(Default);
+  // If the Default block has no more predecessors just remove it
+  if (PI == E) {
+    DeleteDeadBlock(Default);
+  }
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index 61b3965..189caa7 100644
--- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -12,16 +12,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "mem2reg"
+
 STATISTIC(NumPromoted, "Number of alloca's promoted");
 
 namespace {
@@ -34,10 +35,10 @@ namespace {
     // runOnFunction - To run this pass, first we calculate the alloca
     // instructions that are safe for promotion, then we promote each one.
     //
-    virtual bool runOnFunction(Function &F);
+    bool runOnFunction(Function &F) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<DominatorTree>();
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<DominatorTreeWrapperPass>();
       AU.setPreservesCFG();
       // This is a cluster of orthogonal Transforms
       AU.addPreserved<UnifyFunctionExitNodes>();
@@ -50,7 +51,7 @@ namespace {
 char PromotePass::ID = 0;
 INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
                 false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
                 false, false)
 
@@ -61,7 +62,7 @@ bool PromotePass::runOnFunction(Function &F) {
 
   bool Changed  = false;
 
-  DominatorTree &DT = getAnalysis<DominatorTree>();
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
   while (1) {
     Allocas.clear();
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index c3704531..395a46b 100644
--- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -48,11 +48,11 @@ namespace {
       initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
     }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
     }
 
-    bool runOnModule(Module &M) {
+    bool runOnModule(Module &M) override {
       static const char *const metaNames[] = {
         // See http://en.wikipedia.org/wiki/Metasyntactic_variable
         "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index ff6e6f9..d9dbbca 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -24,16 +24,16 @@ static void appendToGlobalArray(const char *Array,
                                 Module &M, Function *F, int Priority) {
   IRBuilder<> IRB(M.getContext());
   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
-  StructType *Ty = StructType::get(
-      IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL);
-
-  Constant *RuntimeCtorInit = ConstantStruct::get(
-      Ty, IRB.getInt32(Priority), F, NULL);
 
   // Get the current set of static global constructors and add the new ctor
   // to the list.
   SmallVector<Constant *, 16> CurrentCtors;
-  if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
+  StructType *EltTy;
+  if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
+    // If there is a global_ctors array, use the existing struct type, which can
+    // have 2 or 3 fields.
+    ArrayType *ATy = cast<ArrayType>(GVCtor->getType()->getElementType());
+    EltTy = cast<StructType>(ATy->getElementType());
     if (Constant *Init = GVCtor->getInitializer()) {
       unsigned n = Init->getNumOperands();
       CurrentCtors.reserve(n + 1);
@@ -41,13 +41,26 @@ static void appendToGlobalArray(const char *Array,
         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
     }
     GVCtor->eraseFromParent();
+  } else {
+    // Use a simple two-field struct if there isn't one already.
+    EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
+                            nullptr);
   }
 
+  // Build a 2 or 3 field global_ctor entry.  We don't take a comdat key.
+  Constant *CSVals[3];
+  CSVals[0] = IRB.getInt32(Priority);
+  CSVals[1] = F;
+  // FIXME: Drop support for the two element form in LLVM 4.0.
+  if (EltTy->getNumElements() >= 3)
+    CSVals[2] = llvm::Constant::getNullValue(IRB.getInt8PtrTy());
+  Constant *RuntimeCtorInit =
+      ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
+
   CurrentCtors.push_back(RuntimeCtorInit);
 
   // Create a new initializer.
-  ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(),
-                                 CurrentCtors.size());
+  ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
 
   // Create the new global variable and replace all uses of
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 8f6eee3..06d73fe 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -25,7 +25,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -34,23 +33,25 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <queue>
 using namespace llvm;
 
+#define DEBUG_TYPE "mem2reg"
+
 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
 STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
@@ -59,11 +60,10 @@ STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
 bool llvm::isAllocaPromotable(const AllocaInst *AI) {
   // FIXME: If the memory unit is of pointer or integer type, we can permit
   // assignments to subsections of the memory unit.
+  unsigned AS = AI->getType()->getAddressSpace();
 
   // Only allow direct and non-volatile loads and stores...
-  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
-       UI != UE; ++UI) { // Loop over all of the uses of the alloca
-    const User *U = *UI;
+  for (const User *U : AI->users()) {
     if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
       // Note that atomic loads can be transformed; atomic semantics do
       // not have any meaning for a local alloca.
@@ -81,12 +81,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
           II->getIntrinsicID() != Intrinsic::lifetime_end)
         return false;
     } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+      if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
         return false;
       if (!onlyUsedByLifetimeMarkers(BCI))
         return false;
     } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
-      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
         return false;
       if (!GEPI->hasAllZeroIndices())
         return false;
@@ -116,11 +116,11 @@ struct AllocaInfo {
   void clear() {
     DefiningBlocks.clear();
     UsingBlocks.clear();
-    OnlyStore = 0;
-    OnlyBlock = 0;
+    OnlyStore = nullptr;
+    OnlyBlock = nullptr;
     OnlyUsedInOneBlock = true;
-    AllocaPointerVal = 0;
-    DbgDeclare = 0;
+    AllocaPointerVal = nullptr;
+    DbgDeclare = nullptr;
   }
 
   /// Scan the uses of the specified alloca, filling in the AllocaInfo used
@@ -131,8 +131,7 @@ struct AllocaInfo {
     // As we scan the uses of the alloca instruction, keep track of stores,
     // and decide whether all of the loads and stores to the alloca are within
     // the same basic block.
-    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
-         UI != E;) {
+    for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
       Instruction *User = cast<Instruction>(*UI++);
 
       if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
@@ -149,7 +148,7 @@ struct AllocaInfo {
       }
 
       if (OnlyUsedInOneBlock) {
-        if (OnlyBlock == 0)
+        if (!OnlyBlock)
           OnlyBlock = User->getParent();
         else if (OnlyBlock != User->getParent())
           OnlyUsedInOneBlock = false;
@@ -165,7 +164,7 @@ class RenamePassData {
 public:
   typedef std::vector<Value *> ValVector;
 
-  RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+  RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
   RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
       : BB(B), Pred(P), Values(V) {}
   BasicBlock *BB;
@@ -317,8 +316,7 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
   // Knowing that this alloca is promotable, we know that it's safe to kill all
   // instructions except for load and store.
 
-  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
-       UI != UE;) {
+  for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
     Instruction *I = cast<Instruction>(*UI);
     ++UI;
     if (isa<LoadInst>(I) || isa<StoreInst>(I))
@@ -328,10 +326,9 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
       // The only users of this bitcast/GEP instruction are lifetime intrinsics.
       // Follow the use/def chain to erase them now instead of leaving it for
       // dead code elimination later.
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE;) {
-        Instruction *Inst = cast<Instruction>(*UI);
-        ++UI;
+      for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
+        Instruction *Inst = cast<Instruction>(*UUI);
+        ++UUI;
         Inst->eraseFromParent();
       }
     }
@@ -359,7 +356,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
   // Clear out UsingBlocks.  We will reconstruct it here if needed.
   Info.UsingBlocks.clear();
 
-  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+  for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
     Instruction *UserInst = cast<Instruction>(*UI++);
     if (!isa<LoadInst>(UserInst)) {
       assert(UserInst == OnlyStore && "Should only have load/stores");
@@ -456,9 +453,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
   StoresByIndexTy StoresByIndex;
 
-  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
-       ++UI)
-    if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+  for (User *U : AI->users())
+    if (StoreInst *SI = dyn_cast<StoreInst>(U))
       StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
 
   // Sort the stores by their index, making it efficient to do a lookup with a
@@ -467,7 +463,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
 
   // Walk all of the loads from this alloca, replacing them with the nearest
   // store above them, if any.
-  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+  for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
     LoadInst *LI = dyn_cast<LoadInst>(*UI++);
     if (!LI)
       continue;
@@ -477,7 +473,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
     // Find the nearest store that has a lower index than this load.
     StoresByIndexTy::iterator I =
         std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
-                         std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
+                         std::make_pair(LoadIdx,
+                                        static_cast<StoreInst *>(nullptr)),
                          less_first());
 
     if (I == StoresByIndex.begin())
@@ -485,7 +482,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
       LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
     else
       // Otherwise, there was a store before this load, the load takes its value.
-      LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));
+      LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
 
     if (AST && LI->getType()->isPointerTy())
       AST->deleteValue(LI);
@@ -495,7 +492,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
 
   // Remove the (now dead) stores and alloca.
   while (!AI->use_empty()) {
-    StoreInst *SI = cast<StoreInst>(AI->use_back());
+    StoreInst *SI = cast<StoreInst>(AI->user_back());
     // Record debuginfo for the store before removing it.
     if (DbgDeclareInst *DDI = Info.DbgDeclare) {
       DIBuilder DIB(*AI->getParent()->getParent()->getParent());
@@ -638,7 +635,7 @@ void PromoteMem2Reg::run() {
   // and inserting the phi nodes we marked as necessary
   //
   std::vector<RenamePassData> RenamePassWorkList;
-  RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+  RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values));
   do {
     RenamePassData RPD;
     RPD.swap(RenamePassWorkList.back());
@@ -679,8 +676,8 @@ void PromoteMem2Reg::run() {
 
     // Iterating over NewPhiNodes is deterministic, so it is safe to try to
     // simplify and RAUW them as we go.  If it was not, we could add uses to
-    // the values we replace with in a non deterministic order, thus creating
-    // non deterministic def->use chains.
+    // the values we replace with in a non-deterministic order, thus creating
+    // non-deterministic def->use chains.
     for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
              I = NewPhiNodes.begin(),
              E = NewPhiNodes.end();
@@ -688,7 +685,7 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
 
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
+      if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT)) {
         if (AST && PN->getType()->isPointerTy())
           AST->deleteValue(PN);
         PN->replaceAllUsesWith(V);
@@ -996,7 +993,7 @@ NextIteration:
         // Get the next phi node.
         ++PNI;
         APN = dyn_cast<PHINode>(PNI);
-        if (APN == 0)
+        if (!APN)
           break;
 
         // Verify that it is missing entries.  If not, it is not being inserted
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 30adbfa..3fcb789 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -11,17 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "ssaupdater"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/AlignOf.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -30,20 +27,22 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "ssaupdater"
+
 typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
 SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
-  : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+  : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
   delete static_cast<AvailableValsTy*>(AV);
 }
 
 void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
-  if (AV == 0)
+  if (!AV)
     AV = new AvailableValsTy();
   else
     getAvailableVals(AV).clear();
@@ -56,7 +55,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
 }
 
 void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
-  assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+  assert(ProtoType && "Need to initialize SSAUpdater");
   assert(ProtoType == V->getType() &&
          "All rewritten values must have the same type");
   getAvailableVals(AV)[BB] = V;
@@ -92,7 +91,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // Otherwise, we have the hard case.  Get the live-in values for each
   // predecessor.
   SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
-  Value *SingularValue = 0;
+  Value *SingularValue = nullptr;
 
   // We can get our predecessor info by walking the pred_iterator list, but it
   // is relatively slow.  If we already have PHI nodes in this block, walk one
@@ -107,7 +106,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
       if (i == 0)
         SingularValue = PredVal;
       else if (PredVal != SingularValue)
-        SingularValue = 0;
+        SingularValue = nullptr;
     }
   } else {
     bool isFirstPred = true;
@@ -121,7 +120,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
         SingularValue = PredVal;
         isFirstPred = false;
       } else if (PredVal != SingularValue)
-        SingularValue = 0;
+        SingularValue = nullptr;
     }
   }
 
@@ -130,7 +129,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
     return UndefValue::get(ProtoType);
 
   // Otherwise, if all the merged values are the same, just use it.
-  if (SingularValue != 0)
+  if (SingularValue)
     return SingularValue;
 
   // Otherwise, we do need a PHI: check to see if we already have one available
@@ -293,7 +292,7 @@ public:
     PHINode *PHI = ValueIsPHI(Val, Updater);
     if (PHI && PHI->getNumIncomingValues() == 0)
       return PHI;
-    return 0;
+    return nullptr;
   }
 
   /// GetPHIValue - For the specified PHI instruction, return the value
@@ -403,7 +402,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     // the order of these instructions in the block.  If the first use in the
     // block is a load, then it uses the live in value.  The last store defines
     // the live out value.  We handle this by doing a linear scan of the block.
-    Value *StoredValue = 0;
+    Value *StoredValue = nullptr;
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
       if (LoadInst *L = dyn_cast<LoadInst>(II)) {
         // If this is a load from an unrelated pointer, ignore it.
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index ff50b12..24bb63b 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "simplifycfg"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -23,6 +22,8 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -34,14 +35,12 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/NoFolder.h"
-#include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
@@ -50,6 +49,8 @@
 using namespace llvm;
 using namespace PatternMatch;
 
+#define DEBUG_TYPE "simplifycfg"
+
 static cl::opt<unsigned>
 PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
    cl::desc("Control the amount of phi node folding to perform (default = 1)"));
@@ -62,12 +63,13 @@ static cl::opt<bool>
 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
        cl::desc("Sink common instructions down to the end block"));
 
-static cl::opt<bool>
-HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
-       cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+static cl::opt<bool> HoistCondStores(
+    "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+    cl::desc("Hoist conditional stores if an unconditional store precedes"));
 
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)");
 STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 
@@ -90,7 +92,7 @@ namespace {
 
 class SimplifyCFGOpt {
   const TargetTransformInfo &TTI;
-  const DataLayout *const TD;
+  const DataLayout *const DL;
   Value *isValueEqualityComparison(TerminatorInst *TI);
   BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
                                std::vector<ValueEqualityComparisonCase> &Cases);
@@ -109,8 +111,8 @@ class SimplifyCFGOpt {
   bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
 
 public:
-  SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
-      : TTI(TTI), TD(TD) {}
+  SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *DL)
+      : TTI(TTI), DL(DL) {}
   bool run(BasicBlock *BB);
 };
 }
@@ -199,8 +201,8 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
 /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
 /// given instruction, which is assumed to be safe to speculate. 1 means
 /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
-static unsigned ComputeSpeculationCost(const User *I) {
-  assert(isSafeToSpeculativelyExecute(I) &&
+static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
+  assert(isSafeToSpeculativelyExecute(I, DL) &&
          "Instruction is not safe to speculatively execute!");
   switch (Operator::getOpcode(I)) {
   default:
@@ -211,6 +213,7 @@ static unsigned ComputeSpeculationCost(const User *I) {
     if (!cast<GEPOperator>(I)->hasAllConstantIndices())
       return UINT_MAX;
     return 1;
+  case Instruction::ExtractValue:
   case Instruction::Load:
   case Instruction::Add:
   case Instruction::Sub:
@@ -224,6 +227,9 @@ static unsigned ComputeSpeculationCost(const User *I) {
   case Instruction::Trunc:
   case Instruction::ZExt:
   case Instruction::SExt:
+  case Instruction::BitCast:
+  case Instruction::ExtractElement:
+  case Instruction::InsertElement:
     return 1; // These are all cheap.
 
   case Instruction::Call:
@@ -251,7 +257,8 @@ static unsigned ComputeSpeculationCost(const User *I) {
 /// CostRemaining, false is returned and CostRemaining is undefined.
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
                                 SmallPtrSet<Instruction*, 4> *AggressiveInsts,
-                                unsigned &CostRemaining) {
+                                unsigned &CostRemaining,
+                                const DataLayout *DL) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
     // Non-instructions all dominate instructions, but not all constantexprs
@@ -271,12 +278,12 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // branch to BB, then it must be in the 'conditional' part of the "if
   // statement".  If not, it definitely dominates the region.
   BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
-  if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+  if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
     return true;
 
   // If we aren't allowing aggressive promotion anymore, then don't consider
   // instructions in the 'if region'.
-  if (AggressiveInsts == 0) return false;
+  if (!AggressiveInsts) return false;
 
   // If we have seen this instruction before, don't count it again.
   if (AggressiveInsts->count(I)) return true;
@@ -284,10 +291,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // Okay, it looks like the instruction IS in the "condition".  Check to
   // see if it's a cheap instruction to unconditionally compute, and if it
   // only uses stuff defined outside of the condition.  If so, hoist it out.
-  if (!isSafeToSpeculativelyExecute(I))
+  if (!isSafeToSpeculativelyExecute(I, DL))
     return false;
 
-  unsigned Cost = ComputeSpeculationCost(I);
+  unsigned Cost = ComputeSpeculationCost(I, DL);
 
   if (Cost > CostRemaining)
     return false;
@@ -297,7 +304,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // Okay, we can only really hoist these out if their operands do
   // not take us over the cost threshold.
   for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
   AggressiveInsts->insert(I);
@@ -306,15 +313,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
 
 /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
 /// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
   // Normal constant int.
   ConstantInt *CI = dyn_cast<ConstantInt>(V);
-  if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+  if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy())
     return CI;
 
   // This is some kind of pointer constant. Turn it into a pointer-sized
   // ConstantInt if possible.
-  IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
+  IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
 
   // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
   if (isa<ConstantPointerNull>(V))
@@ -331,7 +338,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
           return cast<ConstantInt>
             (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
       }
-  return 0;
+  return nullptr;
 }
 
 /// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
@@ -340,13 +347,13 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
 /// Values vector.
 static Value *
 GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
-                       const DataLayout *TD, bool isEQ, unsigned &UsedICmps) {
+                       const DataLayout *DL, bool isEQ, unsigned &UsedICmps) {
   Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0) return 0;
+  if (!I) return nullptr;
 
   // If this is an icmp against a constant, handle this as one of the cases.
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
-    if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+    if (ConstantInt *C = GetConstantInt(I->getOperand(1), DL)) {
       Value *RHSVal;
       ConstantInt *RHSC;
 
@@ -389,27 +396,27 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
 
       // If there are a ton of values, we don't want to make a ginormous switch.
       if (Span.getSetSize().ugt(8) || Span.isEmptySet())
-        return 0;
+        return nullptr;
 
       for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
         Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
       UsedICmps++;
       return hasAdd ? RHSVal : I->getOperand(0);
     }
-    return 0;
+    return nullptr;
   }
 
   // Otherwise, we can only handle an | or &, depending on isEQ.
   if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
-    return 0;
+    return nullptr;
 
   unsigned NumValsBeforeLHS = Vals.size();
   unsigned UsedICmpsBeforeLHS = UsedICmps;
-  if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+  if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, DL,
                                           isEQ, UsedICmps)) {
     unsigned NumVals = Vals.size();
     unsigned UsedICmpsBeforeRHS = UsedICmps;
-    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
                                             isEQ, UsedICmps)) {
       if (LHS == RHS)
         return LHS;
@@ -419,33 +426,33 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
 
     // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
     // set it and return success.
-    if (Extra == 0 || Extra == I->getOperand(1)) {
+    if (Extra == nullptr || Extra == I->getOperand(1)) {
       Extra = I->getOperand(1);
       return LHS;
     }
 
     Vals.resize(NumValsBeforeLHS);
     UsedICmps = UsedICmpsBeforeLHS;
-    return 0;
+    return nullptr;
   }
 
   // If the LHS can't be folded in, but Extra is available and RHS can, try to
   // use LHS as Extra.
-  if (Extra == 0 || Extra == I->getOperand(0)) {
+  if (Extra == nullptr || Extra == I->getOperand(0)) {
     Value *OldExtra = Extra;
     Extra = I->getOperand(0);
-    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
                                             isEQ, UsedICmps))
       return RHS;
     assert(Vals.size() == NumValsBeforeLHS);
     Extra = OldExtra;
   }
 
-  return 0;
+  return nullptr;
 }
 
 static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
-  Instruction *Cond = 0;
+  Instruction *Cond = nullptr;
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     Cond = dyn_cast<Instruction>(SI->getCondition());
   } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -462,7 +469,7 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
 /// isValueEqualityComparison - Return true if the specified terminator checks
 /// to see if a value is equal to constant integer value.
 Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
-  Value *CV = 0;
+  Value *CV = nullptr;
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     // Do not permit merging of large switch instructions into their
     // predecessors unless there is only one predecessor.
@@ -472,14 +479,14 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
   } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
     if (BI->isConditional() && BI->getCondition()->hasOneUse())
       if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
-        if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD))
+        if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
           CV = ICI->getOperand(0);
 
   // Unwrap any lossless ptrtoint cast.
-  if (TD && CV) {
+  if (DL && CV) {
     if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
       Value *Ptr = PTII->getPointerOperand();
-      if (PTII->getType() == TD->getIntPtrType(Ptr->getType()))
+      if (PTII->getType() == DL->getIntPtrType(Ptr->getType()))
         CV = Ptr;
     }
   }
@@ -504,7 +511,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
   ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
   BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
   Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1),
-                                                             TD),
+                                                             DL),
                                               Succ));
   return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
 }
@@ -652,11 +659,11 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
 
   // Otherwise, TI's block must correspond to some matched value.  Find out
   // which value (or set of values) this is.
-  ConstantInt *TIV = 0;
+  ConstantInt *TIV = nullptr;
   BasicBlock *TIBB = TI->getParent();
   for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
     if (PredCases[i].Dest == TIBB) {
-      if (TIV != 0)
+      if (TIV)
         return false;  // Cannot handle multiple values coming to this block.
       TIV = PredCases[i].Value;
     }
@@ -664,7 +671,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
 
   // Okay, we found the one constant that our value can be if we get into TI's
   // BB.  Find out which successor will unconditionally be branched to.
-  BasicBlock *TheRealDest = 0;
+  BasicBlock *TheRealDest = nullptr;
   for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
     if (ThisCases[i].Value == TIV) {
       TheRealDest = ThisCases[i].Dest;
@@ -672,7 +679,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
     }
 
   // If not handled by any explicit cases, it is handled by the default case.
-  if (TheRealDest == 0) TheRealDest = ThisDef;
+  if (!TheRealDest) TheRealDest = ThisDef;
 
   // Remove PHI node entries for dead edges.
   BasicBlock *CheckEdge = TheRealDest;
@@ -680,7 +687,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
     if (*SI != CheckEdge)
       (*SI)->removePredecessor(TIBB);
     else
-      CheckEdge = 0;
+      CheckEdge = nullptr;
 
   // Insert the new branch.
   Instruction *NI = Builder.CreateBr(TheRealDest);
@@ -732,8 +739,7 @@ static void GetBranchWeights(TerminatorInst *TI,
   MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
   assert(MD);
   for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
-    ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
-    assert(CI);
+    ConstantInt *CI = cast<ConstantInt>(MD->getOperand(i));
     Weights.push_back(CI->getValue().getZExtValue());
   }
 
@@ -748,21 +754,14 @@ static void GetBranchWeights(TerminatorInst *TI,
   }
 }
 
-/// Sees if any of the weights are too big for a uint32_t, and halves all the
-/// weights if any are.
+/// Keep halving the weights until all can fit in uint32_t.
 static void FitWeights(MutableArrayRef<uint64_t> Weights) {
-  bool Halve = false;
-  for (unsigned i = 0; i < Weights.size(); ++i)
-    if (Weights[i] > UINT_MAX) {
-      Halve = true;
-      break;
-    }
-
-  if (! Halve)
-    return;
-
-  for (unsigned i = 0; i < Weights.size(); ++i)
-    Weights[i] /= 2;
+  uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
+  if (Max > UINT_MAX) {
+    unsigned Offset = 32 - countLeadingZeros(Max);
+    for (uint64_t &I : Weights)
+      I >>= Offset;
+  }
 }
 
 /// FoldValueComparisonIntoPredecessors - The specified terminator is a value
@@ -929,8 +928,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       Builder.SetInsertPoint(PTI);
       // Convert pointer to int before we switch.
       if (CV->getType()->isPointerTy()) {
-        assert(TD && "Cannot switch on pointer without DataLayout");
-        CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
+        assert(DL && "Cannot switch on pointer without DataLayout");
+        CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()),
                                     "magicptr");
       }
 
@@ -957,10 +956,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       // Okay, last check.  If BB is still a successor of PSI, then we must
       // have an infinite loop case.  If so, add an infinitely looping block
       // to handle the case to preserve the behavior of the code.
-      BasicBlock *InfLoopBlock = 0;
+      BasicBlock *InfLoopBlock = nullptr;
       for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
         if (NewSI->getSuccessor(i) == BB) {
-          if (InfLoopBlock == 0) {
+          if (!InfLoopBlock) {
             // Insert it at the end of the function, because it's either code,
             // or it won't matter if it's hot. :)
             InfLoopBlock = BasicBlock::Create(BB->getContext(),
@@ -999,7 +998,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
 /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
 /// BB2, hoist any common code in the two blocks up into the branch block.  The
 /// caller of this function guarantees that BI's block dominates BB1 and BB2.
-static bool HoistThenElseCodeToIf(BranchInst *BI) {
+static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
   // This does very trivial matching, with limited scanning, to find identical
   // instructions in the two blocks.  In particular, we don't want to get into
   // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
@@ -1073,9 +1072,9 @@ HoistTerminator:
       if (BB1V == BB2V)
         continue;
 
-      if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
+      if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL))
         return Changed;
-      if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
+      if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL))
         return Changed;
     }
   }
@@ -1106,7 +1105,7 @@ HoistTerminator:
       // These values do not agree.  Insert a select instruction before NT
       // that determines the right value.
       SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
-      if (SI == 0)
+      if (!SI)
         SI = cast<SelectInst>
           (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
                                 BB1V->getName()+"."+BB2V->getName()));
@@ -1151,7 +1150,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
 
   // Gather the PHI nodes in BBEnd.
   std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
-  Instruction *FirstNonPhiInBBEnd = 0;
+  Instruction *FirstNonPhiInBBEnd = nullptr;
   for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
        I != E; ++I) {
     if (PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -1229,7 +1228,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
     // The operands should be either the same or they need to be generated
     // with a PHI node after sinking. We only handle the case where there is
     // a single pair of different operands.
-    Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+    Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
     unsigned Op1Idx = 0;
     for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
       if (I1->getOperand(I) == I2->getOperand(I))
@@ -1325,11 +1324,11 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
                                      BasicBlock *StoreBB, BasicBlock *EndBB) {
   StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
   if (!StoreToHoist)
-    return 0;
+    return nullptr;
 
   // Volatile or atomic.
   if (!StoreToHoist->isSimple())
-    return 0;
+    return nullptr;
 
   Value *StorePtr = StoreToHoist->getPointerOperand();
 
@@ -1341,7 +1340,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
 
     // Could be calling an instruction that effects memory like free().
     if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
-      return 0;
+      return nullptr;
 
     StoreInst *SI = dyn_cast<StoreInst>(CurI);
     // Found the previous store make sure it stores to the same location.
@@ -1349,10 +1348,10 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
       // Found the previous store, return its value operand.
       return SI->getValueOperand();
     else if (SI)
-      return 0; // Unknown store.
+      return nullptr; // Unknown store.
   }
 
-  return 0;
+  return nullptr;
 }
 
 /// \brief Speculate a conditional basic block flattening the CFG.
@@ -1392,7 +1391,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
 /// \endcode
 ///
 /// \returns true if the conditional block is removed.
-static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
+                                   const DataLayout *DL) {
   // Be conservative for now. FP select instruction can often be expensive.
   Value *BrCond = BI->getCondition();
   if (isa<FCmpInst>(BrCond))
@@ -1418,10 +1418,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
   SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
 
   unsigned SpeculationCost = 0;
-  Value *SpeculatedStoreValue = 0;
-  StoreInst *SpeculatedStore = 0;
+  Value *SpeculatedStoreValue = nullptr;
+  StoreInst *SpeculatedStore = nullptr;
   for (BasicBlock::iterator BBI = ThenBB->begin(),
-                            BBE = llvm::prior(ThenBB->end());
+                            BBE = std::prev(ThenBB->end());
        BBI != BBE; ++BBI) {
     Instruction *I = BBI;
     // Skip debug info.
@@ -1435,13 +1435,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
       return false;
 
     // Don't hoist the instruction if it's unsafe or expensive.
-    if (!isSafeToSpeculativelyExecute(I) &&
+    if (!isSafeToSpeculativelyExecute(I, DL) &&
         !(HoistCondStores &&
           (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
                                                          EndBB))))
       return false;
     if (!SpeculatedStoreValue &&
-        ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+        ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold)
       return false;
 
     // Store the store speculation candidate.
@@ -1492,11 +1492,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
     if (!OrigCE && !ThenCE)
       continue; // Known safe and cheap.
 
-    if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
-        (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
+    if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) ||
+        (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL)))
       return false;
-    unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0;
-    unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0;
+    unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0;
+    unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0;
     if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
       return false;
 
@@ -1531,7 +1531,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
 
   // Hoist the instructions.
   BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
-                           llvm::prior(ThenBB->end()));
+                           std::prev(ThenBB->end()));
 
   // Insert selects and rewrite the PHI operands.
   IRBuilder<true, NoFolder> Builder(BI);
@@ -1589,10 +1589,9 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
 
     // We can only support instructions that do not define values that are
     // live outside of the current basic block.
-    for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
-         UI != E; ++UI) {
-      Instruction *U = cast<Instruction>(*UI);
-      if (U->getParent() != BB || isa<PHINode>(U)) return false;
+    for (User *U : BBI->users()) {
+      Instruction *UI = cast<Instruction>(U);
+      if (UI->getParent() != BB || isa<PHINode>(UI)) return false;
     }
 
     // Looks ok, continue checking.
@@ -1605,7 +1604,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
 /// that is defined in the same block as the branch and if any PHI entries are
 /// constants, thread edges corresponding to that entry to be branches to their
 /// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
   BasicBlock *BB = BI->getParent();
   PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
   // NOTE: we currently cannot transform this case if the PHI node is used
@@ -1628,7 +1627,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
   // constants.
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
-    if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+    if (!CB || !CB->getType()->isIntegerTy(1)) continue;
 
     // Okay, we now know that all edges from PredBB should be revectored to
     // branch to RealDest.
@@ -1674,7 +1673,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
       }
 
       // Check for trivial simplification.
-      if (Value *V = SimplifyInstruction(N, TD)) {
+      if (Value *V = SimplifyInstruction(N, DL)) {
         TranslateMap[BBI] = V;
         delete N;   // Instruction folded away, don't need actual inst
       } else {
@@ -1695,7 +1694,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
       }
 
     // Recurse, simplifying any other constants.
-    return FoldCondBranchOnPHI(BI, TD) | true;
+    return FoldCondBranchOnPHI(BI, DL) | true;
   }
 
   return false;
@@ -1703,7 +1702,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
 
 /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
 /// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
   // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
   // statement", which has a very simple dominance structure.  Basically, we
   // are trying to find the condition that is being branched on, which
@@ -1737,23 +1736,23 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
 
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
     PHINode *PN = cast<PHINode>(II++);
-    if (Value *V = SimplifyInstruction(PN, TD)) {
+    if (Value *V = SimplifyInstruction(PN, DL)) {
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
       continue;
     }
 
     if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
-                             MaxCostVal0) ||
+                             MaxCostVal0, DL) ||
         !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
-                             MaxCostVal1))
+                             MaxCostVal1, DL))
       return false;
   }
 
   // If we folded the first phi, PN dangles at this point.  Refresh it.  If
   // we ran out of PHIs then we simplified them all.
   PN = dyn_cast<PHINode>(BB->begin());
-  if (PN == 0) return true;
+  if (!PN) return true;
 
   // Don't fold i1 branches on PHIs which contain binary operators.  These can
   // often be turned into switches and other things.
@@ -1767,11 +1766,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
   // instructions in the predecessor blocks can be promoted as well.  If
   // not, we won't be able to get rid of the control flow, so it's not
   // worth promoting to select instructions.
-  BasicBlock *DomBlock = 0;
+  BasicBlock *DomBlock = nullptr;
   BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
   BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
   if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
-    IfBlock1 = 0;
+    IfBlock1 = nullptr;
   } else {
     DomBlock = *pred_begin(IfBlock1);
     for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
@@ -1784,7 +1783,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
   }
 
   if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
-    IfBlock2 = 0;
+    IfBlock2 = nullptr;
   } else {
     DomBlock = *pred_begin(IfBlock2);
     for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
@@ -1964,10 +1963,10 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
 /// FoldBranchToCommonDest - If this basic block is simple enough, and if a
 /// predecessor branches to us and one of our successors, fold the block into
 /// the predecessor and use logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) {
   BasicBlock *BB = BI->getParent();
 
-  Instruction *Cond = 0;
+  Instruction *Cond = nullptr;
   if (BI->isConditional())
     Cond = dyn_cast<Instruction>(BI->getCondition());
   else {
@@ -1993,12 +1992,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
           }
         }
 
-    if (Cond == 0)
+    if (!Cond)
       return false;
   }
 
-  if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
-    Cond->getParent() != BB || !Cond->hasOneUse())
+  if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+      Cond->getParent() != BB || !Cond->hasOneUse())
   return false;
 
   // Only allow this if the condition is a simple instruction that can be
@@ -2013,10 +2012,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   // that feeds the branch.  We later ensure that any values that _it_ uses
   // were also live in the predecessor, so that we don't unnecessarily create
   // register pressure or inhibit out-of-order execution.
-  Instruction *BonusInst = 0;
+  Instruction *BonusInst = nullptr;
   if (&*FrontIt != Cond &&
-      FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
-      isSafeToSpeculativelyExecute(FrontIt)) {
+      FrontIt->hasOneUse() && FrontIt->user_back() == Cond &&
+      isSafeToSpeculativelyExecute(FrontIt, DL)) {
     BonusInst = &*FrontIt;
     ++FrontIt;
 
@@ -2031,7 +2030,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   // Make sure the instruction after the condition is the cond branch.
   BasicBlock::iterator CondIt = Cond; ++CondIt;
 
-  // Ingore dbg intrinsics.
+  // Ignore dbg intrinsics.
   while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
 
   if (&*CondIt != BI)
@@ -2048,7 +2047,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
 
   // Finally, don't infinitely unroll conditional loops.
   BasicBlock *TrueDest  = BI->getSuccessor(0);
-  BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
+  BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
   if (TrueDest == BB || FalseDest == BB)
     return false;
 
@@ -2060,7 +2059,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     // the common successor, verify that the same value flows in from both
     // blocks.
     SmallVector<PHINode*, 4> PHIs;
-    if (PBI == 0 || PBI->isUnconditional() ||
+    if (!PBI || PBI->isUnconditional() ||
         (BI->isConditional() &&
          !SafeToMergeTerminators(BI, PBI)) ||
         (!BI->isConditional() &&
@@ -2094,7 +2093,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     // instructions that are used by the terminator's condition because it
     // exposes more merging opportunities.
     bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
-                         *BonusInst->use_begin() == Cond);
+                         BonusInst->user_back() == Cond);
 
     if (BonusInst && !UsedByBranch) {
       // Collect the values used by the bonus inst
@@ -2150,9 +2149,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     }
 
     // If we have a bonus inst, clone it into the predecessor block.
-    Instruction *NewBonus = 0;
+    Instruction *NewBonus = nullptr;
     if (BonusInst) {
       NewBonus = BonusInst->clone();
+
+      // If we moved a load, we cannot any longer claim any knowledge about
+      // its potential value. The previous information might have been valid
+      // only given the branch precondition.
+      // For an analogous reason, we must also drop all the metadata whose
+      // semantics we don't understand.
+      NewBonus->dropUnknownMetadata(LLVMContext::MD_dbg);
+
       PredBlock->getInstList().insert(PBI, NewBonus);
       NewBonus->takeName(BonusInst);
       BonusInst->setName(BonusInst->getName()+".old");
@@ -2218,14 +2225,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
                          MDBuilder(BI->getContext()).
                          createBranchWeights(MDWeights));
       } else
-        PBI->setMetadata(LLVMContext::MD_prof, NULL);
+        PBI->setMetadata(LLVMContext::MD_prof, nullptr);
     } else {
       // Update PHI nodes in the common successors.
       for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
         ConstantInt *PBI_C = cast<ConstantInt>(
           PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
         assert(PBI_C->getType()->isIntegerTy(1));
-        Instruction *MergedCond = 0;
+        Instruction *MergedCond = nullptr;
         if (PBI->getSuccessor(0) == TrueDest) {
           // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
           // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
@@ -2338,7 +2345,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   }
 
   // If this is a conditional branch in an empty block, and if any
-  // predecessors is a conditional branch to one of our destinations,
+  // predecessors are a conditional branch to one of our destinations,
   // fold the conditions into logical ops and one cond br.
   BasicBlock::iterator BBI = BB->begin();
   // Ignore dbg intrinsics.
@@ -2373,16 +2380,33 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   // Do not perform this transformation if it would require
   // insertion of a large number of select instructions. For targets
   // without predication/cmovs, this is a big pessimization.
-  BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
 
+  // Also do not perform this transformation if any phi node in the common
+  // destination block can trap when reached by BB or PBB (PR17073). In that
+  // case, it would be unsafe to hoist the operation into a select instruction.
+
+  BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
   unsigned NumPhis = 0;
   for (BasicBlock::iterator II = CommonDest->begin();
-       isa<PHINode>(II); ++II, ++NumPhis)
+       isa<PHINode>(II); ++II, ++NumPhis) {
     if (NumPhis > 2) // Disable this xform.
       return false;
 
+    PHINode *PN = cast<PHINode>(II);
+    Value *BIV = PN->getIncomingValueForBlock(BB);
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
+      if (CE->canTrap())
+        return false;
+
+    unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+    Value *PBIV = PN->getIncomingValue(PBBIdx);
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
+      if (CE->canTrap())
+        return false;
+  }
+
   // Finally, if everything is ok, fold the branches to logical ops.
-  BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
+  BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
 
   DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
                << "AND: " << *BI->getParent());
@@ -2498,16 +2522,16 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
   // If TrueBB and FalseBB are equal, only try to preserve one copy of that
   // successor.
   BasicBlock *KeepEdge1 = TrueBB;
-  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
 
   // Then remove the rest.
   for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
     BasicBlock *Succ = OldTerm->getSuccessor(I);
     // Make sure only to keep exactly one copy of each edge.
     if (Succ == KeepEdge1)
-      KeepEdge1 = 0;
+      KeepEdge1 = nullptr;
     else if (Succ == KeepEdge2)
-      KeepEdge2 = 0;
+      KeepEdge2 = nullptr;
     else
       Succ->removePredecessor(OldTerm->getParent());
   }
@@ -2516,7 +2540,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
   Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
 
   // Insert an appropriate new terminator.
-  if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+  if (!KeepEdge1 && !KeepEdge2) {
     if (TrueBB == FalseBB)
       // We were only looking for one successor, and it was present.
       // Create an unconditional branch to it.
@@ -2538,7 +2562,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
     // One of the selected values was a successor, but the other wasn't.
     // Insert an unconditional branch to the one that was found;
     // the edge to the one that wasn't must be unreachable.
-    if (KeepEdge1 == 0)
+    if (!KeepEdge1)
       // Only TrueBB was found.
       Builder.CreateBr(TrueBB);
     else
@@ -2625,7 +2649,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
 /// the PHI, merging the third icmp into the switch.
 static bool TryToSimplifyUncondBranchWithICmpInIt(
     ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
-    const DataLayout *TD) {
+    const DataLayout *DL) {
   BasicBlock *BB = ICI->getParent();
 
   // If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2639,7 +2663,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
   // 'V' and this block is the default case for the switch.  In this case we can
   // fold the compared value into the switch to simplify things.
   BasicBlock *Pred = BB->getSinglePredecessor();
-  if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+  if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) return false;
 
   SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
   if (SI->getCondition() != V)
@@ -2653,12 +2677,12 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
     assert(VVal && "Should have a unique destination value");
     ICI->setOperand(0, VVal);
 
-    if (Value *V = SimplifyInstruction(ICI, TD)) {
+    if (Value *V = SimplifyInstruction(ICI, DL)) {
       ICI->replaceAllUsesWith(V);
       ICI->eraseFromParent();
     }
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB, TTI, TD) | true;
+    return SimplifyCFG(BB, TTI, DL) | true;
   }
 
   // Ok, the block is reachable from the default dest.  If the constant we're
@@ -2674,14 +2698,14 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
     ICI->replaceAllUsesWith(V);
     ICI->eraseFromParent();
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB, TTI, TD) | true;
+    return SimplifyCFG(BB, TTI, DL) | true;
   }
 
   // The use of the icmp has to be in the 'end' block, by the only PHI node in
   // the block.
   BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
-  PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
-  if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+  PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
+  if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
       isa<PHINode>(++BasicBlock::iterator(PHIUse)))
     return false;
 
@@ -2730,32 +2754,32 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
 /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
 /// Check to see if it is branching on an or/and chain of icmp instructions, and
 /// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
                                       IRBuilder<> &Builder) {
   Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
-  if (Cond == 0) return false;
+  if (!Cond) return false;
 
 
   // Change br (X == 0 | X == 1), T, F into a switch instruction.
   // If this is a bunch of seteq's or'd together, or if it's a bunch of
   // 'setne's and'ed together, collect them.
-  Value *CompVal = 0;
+  Value *CompVal = nullptr;
   std::vector<ConstantInt*> Values;
   bool TrueWhenEqual = true;
-  Value *ExtraCase = 0;
+  Value *ExtraCase = nullptr;
   unsigned UsedICmps = 0;
 
   if (Cond->getOpcode() == Instruction::Or) {
-    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, true,
                                      UsedICmps);
   } else if (Cond->getOpcode() == Instruction::And) {
-    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, false,
                                      UsedICmps);
     TrueWhenEqual = false;
   }
 
   // If we didn't have a multiply compared value, fail.
-  if (CompVal == 0) return false;
+  if (!CompVal) return false;
 
   // Avoid turning single icmps into a switch.
   if (UsedICmps <= 1)
@@ -2811,9 +2835,9 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
   Builder.SetInsertPoint(BI);
   // Convert pointer to int before we switch.
   if (CompVal->getType()->isPointerTy()) {
-    assert(TD && "Cannot switch on pointer without DataLayout");
+    assert(DL && "Cannot switch on pointer without DataLayout");
     CompVal = Builder.CreatePtrToInt(CompVal,
-                                     TD->getIntPtrType(CompVal->getType()),
+                                     DL->getIntPtrType(CompVal->getType()),
                                      "magicptr");
   }
 
@@ -3050,7 +3074,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         // Find the most popular block.
         unsigned MaxPop = 0;
         unsigned MaxIndex = 0;
-        BasicBlock *MaxBlock = 0;
+        BasicBlock *MaxBlock = nullptr;
         for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
              I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
           if (I->second.first > MaxPop ||
@@ -3188,7 +3212,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
   Value *Cond = SI->getCondition();
   unsigned Bits = Cond->getType()->getIntegerBitWidth();
   APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
-  ComputeMaskedBits(Cond, KnownZero, KnownOne);
+  computeKnownBits(Cond, KnownZero, KnownOne);
 
   // Gather dead cases.
   SmallVector<ConstantInt*, 8> DeadCases;
@@ -3222,7 +3246,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
     Case.getCaseSuccessor()->removePredecessor(SI->getParent());
     SI->removeCase(Case);
   }
-  if (HasWeight) {
+  if (HasWeight && Weights.size() >= 2) {
     SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
     SI->setMetadata(LLVMContext::MD_prof,
                     MDBuilder(SI->getParent()->getContext()).
@@ -3241,13 +3265,13 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
                                               BasicBlock *BB,
                                               int *PhiIndex) {
   if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
-    return NULL; // BB must be empty to be a candidate for simplification.
+    return nullptr; // BB must be empty to be a candidate for simplification.
   if (!BB->getSinglePredecessor())
-    return NULL; // BB must be dominated by the switch.
+    return nullptr; // BB must be dominated by the switch.
 
   BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
   if (!Branch || !Branch->isUnconditional())
-    return NULL; // Terminator must be unconditional branch.
+    return nullptr; // Terminator must be unconditional branch.
 
   BasicBlock *Succ = Branch->getSuccessor(0);
 
@@ -3263,7 +3287,7 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
     return PHI;
   }
 
-  return NULL;
+  return nullptr;
 }
 
 /// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
@@ -3306,6 +3330,11 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
 /// ValidLookupTableConstant - Return true if the backend will be able to handle
 /// initializing an array of constants like C.
 static bool ValidLookupTableConstant(Constant *C) {
+  if (C->isThreadDependent())
+    return false;
+  if (C->isDLLImportDependent())
+    return false;
+
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     return CE->isGEPWithNoNotionalOverIndexing();
 
@@ -3336,12 +3365,12 @@ ConstantFold(Instruction *I,
   if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
     Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
     if (!A)
-      return 0;
+      return nullptr;
     if (A->isAllOnesValue())
       return LookupConstant(Select->getTrueValue(), ConstantPool);
     if (A->isNullValue())
       return LookupConstant(Select->getFalseValue(), ConstantPool);
-    return 0;
+    return nullptr;
   }
 
   SmallVector<Constant *, 4> COps;
@@ -3349,7 +3378,7 @@ ConstantFold(Instruction *I,
     if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
       COps.push_back(A);
     else
-      return 0;
+      return nullptr;
   }
 
   if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
@@ -3428,7 +3457,7 @@ GetCaseResults(SwitchInst *SI,
     Res.push_back(std::make_pair(PHI, ConstVal));
   }
 
-  return true;
+  return Res.size() > 0;
 }
 
 namespace {
@@ -3444,7 +3473,7 @@ namespace {
                       ConstantInt *Offset,
              const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
                       Constant *DefaultValue,
-                      const DataLayout *TD);
+                      const DataLayout *DL);
 
     /// BuildLookup - Build instructions with Builder to retrieve the value at
     /// the position given by Index in the lookup table.
@@ -3452,7 +3481,7 @@ namespace {
 
     /// WouldFitInRegister - Return true if a table with TableSize elements of
     /// type ElementType would fit in a target-legal register.
-    static bool WouldFitInRegister(const DataLayout *TD,
+    static bool WouldFitInRegister(const DataLayout *DL,
                                    uint64_t TableSize,
                                    const Type *ElementType);
 
@@ -3491,38 +3520,44 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
                                      ConstantInt *Offset,
              const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
                                      Constant *DefaultValue,
-                                     const DataLayout *TD)
-    : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
+                                     const DataLayout *DL)
+    : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
+      Array(nullptr) {
   assert(Values.size() && "Can't build lookup table without values!");
   assert(TableSize >= Values.size() && "Can't fit values in table!");
 
   // If all values in the table are equal, this is that value.
   SingleValue = Values.begin()->second;
 
+  Type *ValueType = Values.begin()->second->getType();
+
   // Build up the table contents.
   SmallVector<Constant*, 64> TableContents(TableSize);
   for (size_t I = 0, E = Values.size(); I != E; ++I) {
     ConstantInt *CaseVal = Values[I].first;
     Constant *CaseRes = Values[I].second;
-    assert(CaseRes->getType() == DefaultValue->getType());
+    assert(CaseRes->getType() == ValueType);
 
     uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
                    .getLimitedValue();
     TableContents[Idx] = CaseRes;
 
     if (CaseRes != SingleValue)
-      SingleValue = 0;
+      SingleValue = nullptr;
   }
 
   // Fill in any holes in the table with the default result.
   if (Values.size() < TableSize) {
+    assert(DefaultValue &&
+           "Need a default value to fill the lookup table holes.");
+    assert(DefaultValue->getType() == ValueType);
     for (uint64_t I = 0; I < TableSize; ++I) {
       if (!TableContents[I])
         TableContents[I] = DefaultValue;
     }
 
     if (DefaultValue != SingleValue)
-      SingleValue = 0;
+      SingleValue = nullptr;
   }
 
   // If each element in the table contains the same value, we only need to store
@@ -3533,8 +3568,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
   }
 
   // If the type is integer and the table fits in a register, build a bitmap.
-  if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
-    IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+  if (WouldFitInRegister(DL, TableSize, ValueType)) {
+    IntegerType *IT = cast<IntegerType>(ValueType);
     APInt TableInt(TableSize * IT->getBitWidth(), 0);
     for (uint64_t I = TableSize; I > 0; --I) {
       TableInt <<= IT->getBitWidth();
@@ -3552,7 +3587,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
   }
 
   // Store the table in an array.
-  ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
+  ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
   Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
 
   Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
@@ -3589,6 +3624,16 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
                                  "switch.masked");
     }
     case ArrayKind: {
+      // Make sure the table index will not overflow when treated as signed.
+      IntegerType *IT = cast<IntegerType>(Index->getType());
+      uint64_t TableSize = Array->getInitializer()->getType()
+                                ->getArrayNumElements();
+      if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
+        Index = Builder.CreateZExt(Index,
+                                   IntegerType::get(IT->getContext(),
+                                                    IT->getBitWidth() + 1),
+                                   "switch.tableidx.zext");
+
       Value *GEPIndices[] = { Builder.getInt32(0), Index };
       Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
                                              "switch.gep");
@@ -3598,10 +3643,10 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
   llvm_unreachable("Unknown lookup table kind!");
 }
 
-bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
                                            uint64_t TableSize,
                                            const Type *ElementType) {
-  if (!TD)
+  if (!DL)
     return false;
   const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
   if (!IT)
@@ -3612,7 +3657,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
   // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
   if (TableSize >= UINT_MAX/IT->getBitWidth())
     return false;
-  return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+  return DL->fitsInLegalInteger(TableSize * IT->getBitWidth());
 }
 
 /// ShouldBuildLookupTable - Determine whether a lookup table should be built
@@ -3621,7 +3666,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
 static bool ShouldBuildLookupTable(SwitchInst *SI,
                                    uint64_t TableSize,
                                    const TargetTransformInfo &TTI,
-                                   const DataLayout *TD,
+                                   const DataLayout *DL,
                             const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
   if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
     return false; // TableSize overflowed, or mul below might overflow.
@@ -3637,7 +3682,7 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
 
     // Saturate this flag to false.
     AllTablesFitInRegister = AllTablesFitInRegister &&
-      SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty);
+      SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
 
     // If both flags saturate, we're done. NOTE: This *only* works with
     // saturating flags, and all flags have to saturate first due to the
@@ -3666,7 +3711,7 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
 static bool SwitchToLookupTable(SwitchInst *SI,
                                 IRBuilder<> &Builder,
                                 const TargetTransformInfo &TTI,
-                                const DataLayout* TD) {
+                                const DataLayout* DL) {
   assert(SI->getNumCases() > 1 && "Degenerate switch?");
 
   // Only build lookup table when we have a target that supports it.
@@ -3680,11 +3725,9 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   // GEP needs a runtime relocation in PIC code. We should just build one big
   // string and lookup indices into that.
 
-  // Ignore the switch if the number of cases is too small.
-  // This is similar to the check when building jump tables in
-  // SelectionDAGBuilder::handleJTSwitchCase.
-  // FIXME: Determine the best cut-off.
-  if (SI->getNumCases() < 4)
+  // Ignore switches with less than three cases. Lookup tables will not make them
+  // faster, so we don't analyze them.
+  if (SI->getNumCases() < 3)
     return false;
 
   // Figure out the corresponding result for each case value and phi node in the
@@ -3694,7 +3737,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   ConstantInt *MinCaseVal = CI.getCaseValue();
   ConstantInt *MaxCaseVal = CI.getCaseValue();
 
-  BasicBlock *CommonDest = 0;
+  BasicBlock *CommonDest = nullptr;
   typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
   SmallDenseMap<PHINode*, ResultListTy> ResultLists;
   SmallDenseMap<PHINode*, Constant*> DefaultResults;
@@ -3712,7 +3755,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
     typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
     ResultsTy Results;
     if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
-                        Results, TD))
+                        Results, DL))
       return false;
 
     // Append the result from this case to the list for each phi.
@@ -3723,21 +3766,41 @@ static bool SwitchToLookupTable(SwitchInst *SI,
     }
   }
 
-  // Get the resulting values for the default case.
+  // Keep track of the result types.
+  for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+    PHINode *PHI = PHIs[I];
+    ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
+  }
+
+  uint64_t NumResults = ResultLists[PHIs[0]].size();
+  APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+  uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+  bool TableHasHoles = (NumResults < TableSize);
+
+  // If the table has holes, we need a constant result for the default case
+  // or a bitmask that fits in a register.
   SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
-  if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
-                      DefaultResultsList, TD))
-    return false;
+  bool HasDefaultResults = false;
+  if (TableHasHoles) {
+    HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
+                                       &CommonDest, DefaultResultsList, DL);
+  }
+  bool NeedMask = (TableHasHoles && !HasDefaultResults);
+  if (NeedMask) {
+    // As an extra penalty for the validity test we require more cases.
+    if (SI->getNumCases() < 4)  // FIXME: Find best threshold value (benchmark).
+      return false;
+    if (!(DL && DL->fitsInLegalInteger(TableSize)))
+      return false;
+  }
+
   for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
     PHINode *PHI = DefaultResultsList[I].first;
     Constant *Result = DefaultResultsList[I].second;
     DefaultResults[PHI] = Result;
-    ResultTypes[PHI] = Result->getType();
   }
 
-  APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
-  uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
-  if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes))
+  if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
     return false;
 
   // Create the BB that does the lookups.
@@ -3755,7 +3818,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   // Compute the maximum table size representable by the integer type we are
   // switching upon.
   unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
-  uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+  uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
   assert(MaxTableSize >= TableSize &&
          "It is impossible for a switch to have more entries than the max "
          "representable value of its input integer type's size.");
@@ -3770,25 +3833,67 @@ static bool SwitchToLookupTable(SwitchInst *SI,
     SI->getDefaultDest()->removePredecessor(SI->getParent());
   } else {
     Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
-                                         MinCaseVal->getType(), TableSize));
+                                       MinCaseVal->getType(), TableSize));
     Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
   }
 
   // Populate the BB that does the lookups.
   Builder.SetInsertPoint(LookupBB);
+
+  if (NeedMask) {
+    // Before doing the lookup we do the hole check.
+    // The LookupBB is therefore re-purposed to do the hole check
+    // and we create a new LookupBB.
+    BasicBlock *MaskBB = LookupBB;
+    MaskBB->setName("switch.hole_check");
+    LookupBB = BasicBlock::Create(Mod.getContext(),
+                                  "switch.lookup",
+                                  CommonDest->getParent(),
+                                  CommonDest);
+
+    // Build bitmask; fill in a 1 bit for every case.
+    APInt MaskInt(TableSize, 0);
+    APInt One(TableSize, 1);
+    const ResultListTy &ResultList = ResultLists[PHIs[0]];
+    for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
+      uint64_t Idx = (ResultList[I].first->getValue() -
+                      MinCaseVal->getValue()).getLimitedValue();
+      MaskInt |= One << Idx;
+    }
+    ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
+
+    // Get the TableIndex'th bit of the bitmask.
+    // If this bit is 0 (meaning hole) jump to the default destination,
+    // else continue with table lookup.
+    IntegerType *MapTy = TableMask->getType();
+    Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy,
+                                                 "switch.maskindex");
+    Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex,
+                                        "switch.shifted");
+    Value *LoBit = Builder.CreateTrunc(Shifted,
+                                       Type::getInt1Ty(Mod.getContext()),
+                                       "switch.lobit");
+    Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
+
+    Builder.SetInsertPoint(LookupBB);
+    AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
+  }
+
   bool ReturnedEarly = false;
   for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
     PHINode *PHI = PHIs[I];
 
+    // If using a bitmask, use any value to fill the lookup table holes.
+    Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
     SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
-                            DefaultResults[PHI], TD);
+                            DV, DL);
 
     Value *Result = Table.BuildLookup(TableIndex, Builder);
 
     // If the result is used to return immediately from the function, we want to
     // do that right here.
-    if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
-        *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+    if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) &&
+        PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) {
       Builder.CreateRet(Result);
       ReturnedEarly = true;
       break;
@@ -3811,6 +3916,8 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   SI->eraseFromParent();
 
   ++NumLookupTables;
+  if (NeedMask)
+    ++NumLookupTablesHoles;
   return true;
 }
 
@@ -3822,12 +3929,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
     // see if that predecessor totally determines the outcome of this switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
-        return SimplifyCFG(BB, TTI, TD) | true;
+        return SimplifyCFG(BB, TTI, DL) | true;
 
     Value *Cond = SI->getCondition();
     if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
       if (SimplifySwitchOnSelect(SI, Select))
-        return SimplifyCFG(BB, TTI, TD) | true;
+        return SimplifyCFG(BB, TTI, DL) | true;
 
     // If the block only contains the switch, see if we can fold the block
     // away into any preds.
@@ -3837,22 +3944,22 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       ++BBI;
     if (SI == &*BBI)
       if (FoldValueComparisonIntoPredecessors(SI, Builder))
-        return SimplifyCFG(BB, TTI, TD) | true;
+        return SimplifyCFG(BB, TTI, DL) | true;
   }
 
   // Try to transform the switch into an icmp and a branch.
   if (TurnSwitchRangeIntoICmp(SI, Builder))
-    return SimplifyCFG(BB, TTI, TD) | true;
+    return SimplifyCFG(BB, TTI, DL) | true;
 
   // Remove unreachable cases.
   if (EliminateDeadSwitchCases(SI))
-    return SimplifyCFG(BB, TTI, TD) | true;
+    return SimplifyCFG(BB, TTI, DL) | true;
 
   if (ForwardSwitchConditionToPHI(SI))
-    return SimplifyCFG(BB, TTI, TD) | true;
+    return SimplifyCFG(BB, TTI, DL) | true;
 
-  if (SwitchToLookupTable(SI, Builder, TTI, TD))
-    return SimplifyCFG(BB, TTI, TD) | true;
+  if (SwitchToLookupTable(SI, Builder, TTI, DL))
+    return SimplifyCFG(BB, TTI, DL) | true;
 
   return false;
 }
@@ -3889,7 +3996,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
 
   if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
     if (SimplifyIndirectBrOnSelect(IBI, SI))
-      return SimplifyCFG(BB, TTI, TD) | true;
+      return SimplifyCFG(BB, TTI, DL) | true;
   }
   return Changed;
 }
@@ -3913,7 +4020,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
       for (++I; isa<DbgInfoIntrinsic>(I); ++I)
         ;
       if (I->isTerminator() &&
-          TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD))
+          TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, DL))
         return true;
     }
 
@@ -3921,8 +4028,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   // branches to us and our successor, fold the comparison into the
   // predecessor and use logical operations to update the incoming value
   // for PHI nodes in common successor.
-  if (FoldBranchToCommonDest(BI))
-    return SimplifyCFG(BB, TTI, TD) | true;
+  if (FoldBranchToCommonDest(BI, DL))
+    return SimplifyCFG(BB, TTI, DL) | true;
   return false;
 }
 
@@ -3937,7 +4044,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     // switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
-        return SimplifyCFG(BB, TTI, TD) | true;
+        return SimplifyCFG(BB, TTI, DL) | true;
 
     // This block must be empty, except for the setcond inst, if it exists.
     // Ignore dbg intrinsics.
@@ -3947,67 +4054,67 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       ++I;
     if (&*I == BI) {
       if (FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB, TTI, TD) | true;
+        return SimplifyCFG(BB, TTI, DL) | true;
     } else if (&*I == cast<Instruction>(BI->getCondition())){
       ++I;
       // Ignore dbg intrinsics.
       while (isa<DbgInfoIntrinsic>(I))
         ++I;
       if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB, TTI, TD) | true;
+        return SimplifyCFG(BB, TTI, DL) | true;
     }
   }
 
   // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
-  if (SimplifyBranchOnICmpChain(BI, TD, Builder))
+  if (SimplifyBranchOnICmpChain(BI, DL, Builder))
     return true;
 
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
-  if (FoldBranchToCommonDest(BI))
-    return SimplifyCFG(BB, TTI, TD) | true;
+  if (FoldBranchToCommonDest(BI, DL))
+    return SimplifyCFG(BB, TTI, DL) | true;
 
   // We have a conditional branch to two blocks that are only reachable
   // from BI.  We know that the condbr dominates the two blocks, so see if
   // there is any identical code in the "then" and "else" blocks.  If so, we
   // can hoist it up to the branching block.
-  if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
-    if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
-      if (HoistThenElseCodeToIf(BI))
-        return SimplifyCFG(BB, TTI, TD) | true;
+  if (BI->getSuccessor(0)->getSinglePredecessor()) {
+    if (BI->getSuccessor(1)->getSinglePredecessor()) {
+      if (HoistThenElseCodeToIf(BI, DL))
+        return SimplifyCFG(BB, TTI, DL) | true;
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
-      // execute Successor #0 if it branches to successor #1.
+      // execute Successor #0 if it branches to Successor #1.
       TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
       if (Succ0TI->getNumSuccessors() == 1 &&
           Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
-        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
-          return SimplifyCFG(BB, TTI, TD) | true;
+        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL))
+          return SimplifyCFG(BB, TTI, DL) | true;
     }
-  } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+  } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
     // If Successor #0 has multiple preds, we may be able to conditionally
-    // execute Successor #1 if it branches to successor #0.
+    // execute Successor #1 if it branches to Successor #0.
     TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
     if (Succ1TI->getNumSuccessors() == 1 &&
         Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
-      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
-        return SimplifyCFG(BB, TTI, TD) | true;
+      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL))
+        return SimplifyCFG(BB, TTI, DL) | true;
   }
 
   // If this is a branch on a phi node in the current block, thread control
   // through this block if any PHI node entries are constants.
   if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
     if (PN->getParent() == BI->getParent())
-      if (FoldCondBranchOnPHI(BI, TD))
-        return SimplifyCFG(BB, TTI, TD) | true;
+      if (FoldCondBranchOnPHI(BI, DL))
+        return SimplifyCFG(BB, TTI, DL) | true;
 
   // Scan predecessor blocks for conditional branches.
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
       if (PBI != BI && PBI->isConditional())
         if (SimplifyCondBranchToCondBranch(PBI, BI))
-          return SimplifyCFG(BB, TTI, TD) | true;
+          return SimplifyCFG(BB, TTI, DL) | true;
 
   return false;
 }
@@ -4023,7 +4130,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
 
   if (C->isNullValue()) {
     // Only look at the first use, avoid hurting compile time with long uselists
-    User *Use = *I->use_begin();
+    User *Use = *I->user_begin();
 
     // Now make sure that there are no instructions in between that can alter
     // control flow (eg. calls)
@@ -4119,7 +4226,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
   // eliminate it, do so now.
   if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
     if (PN->getNumIncomingValues() == 2)
-      Changed |= FoldTwoEntryPHINode(PN, TD);
+      Changed |= FoldTwoEntryPHINode(PN, DL);
 
   Builder.SetInsertPoint(BB->getTerminator());
   if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -4151,6 +4258,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
 /// of the CFG.  It returns true if a modification was made.
 ///
 bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
-                       const DataLayout *TD) {
-  return SimplifyCFGOpt(TTI, TD).run(BB);
+                       const DataLayout *DL) {
+  return SimplifyCFGOpt(TTI, DL).run(BB);
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index bf3442a..b284e6f 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -13,9 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "indvars"
-
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/IVUsers.h"
@@ -23,13 +22,18 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+#define DEBUG_TYPE "indvars"
+
 STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
 STATISTIC(NumElimOperand,  "Number of IV operands folded into a use");
 STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
@@ -44,7 +48,7 @@ namespace {
     Loop             *L;
     LoopInfo         *LI;
     ScalarEvolution  *SE;
-    const DataLayout *TD; // May be NULL
+    const DataLayout *DL; // May be NULL
 
     SmallVectorImpl<WeakVH> &DeadInsts;
 
@@ -52,13 +56,14 @@ namespace {
 
   public:
     SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
-                   SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) :
+                   SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) :
       L(Loop),
       LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
       SE(SE),
-      TD(LPM->getAnalysisIfAvailable<DataLayout>()),
       DeadInsts(Dead),
       Changed(false) {
+      DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>();
+      DL = DLP ? &DLP->getDataLayout() : nullptr;
       assert(LI && "IV simplification requires LoopInfo");
     }
 
@@ -67,7 +72,7 @@ namespace {
     /// Iteratively perform simplification on a worklist of users of the
     /// specified induction variable. This is the top-level driver that applies
     /// all simplicitions to users of an IV.
-    void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL);
+    void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
 
     Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
 
@@ -75,6 +80,9 @@ namespace {
     void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
     void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
                               bool IsSigned);
+
+    Instruction *splitOverflowIntrinsic(Instruction *IVUser,
+                                        const DominatorTree *DT);
   };
 }
 
@@ -87,25 +95,25 @@ namespace {
 /// be folded (in case more folding opportunities have been exposed).
 /// Otherwise return null.
 Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
-  Value *IVSrc = 0;
+  Value *IVSrc = nullptr;
   unsigned OperIdx = 0;
-  const SCEV *FoldedExpr = 0;
+  const SCEV *FoldedExpr = nullptr;
   switch (UseInst->getOpcode()) {
   default:
-    return 0;
+    return nullptr;
   case Instruction::UDiv:
   case Instruction::LShr:
     // We're only interested in the case where we know something about
     // the numerator and have a constant denominator.
     if (IVOperand != UseInst->getOperand(OperIdx) ||
         !isa<ConstantInt>(UseInst->getOperand(1)))
-      return 0;
+      return nullptr;
 
     // Attempt to fold a binary operator with constant operand.
     // e.g. ((I + 1) >> 2) => I >> 2
     if (!isa<BinaryOperator>(IVOperand)
         || !isa<ConstantInt>(IVOperand->getOperand(1)))
-      return 0;
+      return nullptr;
 
     IVSrc = IVOperand->getOperand(0);
     // IVSrc must be the (SCEVable) IV, since the other operand is const.
@@ -116,7 +124,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
       // Get a constant for the divisor. See createSCEV.
       uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
       if (D->getValue().uge(BitWidth))
-        return 0;
+        return nullptr;
 
       D = ConstantInt::get(UseInst->getContext(),
                            APInt::getOneBitSet(BitWidth, D->getZExtValue()));
@@ -125,11 +133,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
   }
   // We have something that might fold it's operand. Compare SCEVs.
   if (!SE->isSCEVable(UseInst->getType()))
-    return 0;
+    return nullptr;
 
   // Bypass the operand if SCEV can prove it has no effect.
   if (SE->getSCEV(UseInst) != FoldedExpr)
-    return 0;
+    return nullptr;
 
   DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
         << " -> " << *UseInst << '\n');
@@ -263,6 +271,69 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
   return true;
 }
 
+/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
+/// analysis and optimization.
+///
+/// \return A new value representing the non-overflowing add if possible,
+/// otherwise return the original value.
+Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
+                                                    const DominatorTree *DT) {
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
+  if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
+    return IVUser;
+
+  // Find a branch guarded by the overflow check.
+  BranchInst *Branch = nullptr;
+  Instruction *AddVal = nullptr;
+  for (User *U : II->users()) {
+    if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) {
+      if (ExtractInst->getNumIndices() != 1)
+        continue;
+      if (ExtractInst->getIndices()[0] == 0)
+        AddVal = ExtractInst;
+      else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
+        Branch = dyn_cast<BranchInst>(ExtractInst->user_back());
+    }
+  }
+  if (!AddVal || !Branch)
+    return IVUser;
+
+  BasicBlock *ContinueBB = Branch->getSuccessor(1);
+  if (std::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
+    return IVUser;
+
+  // Check if all users of the add are provably NSW.
+  bool AllNSW = true;
+  for (Use &U : AddVal->uses()) {
+    if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser())) {
+      BasicBlock *UseBB = UseInst->getParent();
+      if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
+        UseBB = PHI->getIncomingBlock(U);
+      if (!DT->dominates(ContinueBB, UseBB)) {
+        AllNSW = false;
+        break;
+      }
+    }
+  }
+  if (!AllNSW)
+    return IVUser;
+
+  // Go for it...
+  IRBuilder<> Builder(IVUser);
+  Instruction *AddInst = dyn_cast<Instruction>(
+    Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
+
+  // The caller expects the new add to have the same form as the intrinsic. The
+  // IV operand position must be the same.
+  assert((AddInst->getOpcode() == Instruction::Add &&
+          AddInst->getOperand(0) == II->getOperand(0)) &&
+         "Bad add instruction created from overflow intrinsic.");
+
+  AddVal->replaceAllUsesWith(AddInst);
+  DeadInsts.push_back(AddVal);
+  return AddInst;
+}
+
 /// pushIVUsers - Add all uses of Def to the current IV's worklist.
 ///
 static void pushIVUsers(
@@ -270,16 +341,15 @@ static void pushIVUsers(
   SmallPtrSet<Instruction*,16> &Simplified,
   SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
 
-  for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
-       UI != E; ++UI) {
-    Instruction *User = cast<Instruction>(*UI);
+  for (User *U : Def->users()) {
+    Instruction *UI = cast<Instruction>(U);
 
     // Avoid infinite or exponential worklist processing.
     // Also ensure unique worklist users.
     // If Def is a LoopPhi, it may not be in the Simplified set, so check for
     // self edges first.
-    if (User != Def && Simplified.insert(User))
-      SimpleIVUsers.push_back(std::make_pair(User, Def));
+    if (UI != Def && Simplified.insert(UI))
+      SimpleIVUsers.push_back(std::make_pair(UI, Def));
   }
 }
 
@@ -334,8 +404,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
   while (!SimpleIVUsers.empty()) {
     std::pair<Instruction*, Instruction*> UseOper =
       SimpleIVUsers.pop_back_val();
+    Instruction *UseInst = UseOper.first;
+
     // Bypass back edges to avoid extra work.
-    if (UseOper.first == CurrIV) continue;
+    if (UseInst == CurrIV) continue;
+
+    if (V && V->shouldSplitOverflowInstrinsics()) {
+      UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
+      if (!UseInst)
+        continue;
+    }
 
     Instruction *IVOperand = UseOper.second;
     for (unsigned N = 0; IVOperand; ++N) {
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
index f9687e4d..33b3637 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -14,14 +14,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "instsimplify"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
@@ -29,6 +28,8 @@
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "instsimplify"
+
 STATISTIC(NumSimplified, "Number of redundant instructions removed");
 
 namespace {
@@ -38,23 +39,27 @@ namespace {
       initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
     }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<TargetLibraryInfo>();
     }
 
     /// runOnFunction - Remove instructions that simplify.
-    bool runOnFunction(Function &F) {
-      const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
-      const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+    bool runOnFunction(Function &F) override {
+      const DominatorTreeWrapperPass *DTWP =
+          getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+      const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+      DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+      const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
       const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
       SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
       bool Changed = false;
 
       do {
-        for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
-             DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
-          for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+        for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
+          // Here be subtlety: the iterator must be incremented before the loop
+          // body (not sure why), so a range-for loop won't work here.
+          for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
             Instruction *I = BI++;
             // The first time through the loop ToSimplify is empty and we try to
             // simplify all instructions.  On later iterations ToSimplify is not
@@ -63,16 +68,23 @@ namespace {
               continue;
             // Don't waste time simplifying unused instructions.
             if (!I->use_empty())
-              if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
+              if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) {
                 // Mark all uses for resimplification next time round the loop.
-                for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-                     UI != UE; ++UI)
-                  Next->insert(cast<Instruction>(*UI));
+                for (User *U : I->users())
+                  Next->insert(cast<Instruction>(U));
                 I->replaceAllUsesWith(V);
                 ++NumSimplified;
                 Changed = true;
               }
-            Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+            bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+            if (res)  {
+              // RecursivelyDeleteTriviallyDeadInstruction can remove
+              // more than one instruction, so simply incrementing the
+              // iterator does not work. When instructions get deleted
+              // re-iterate instead.
+              BI = BB->begin(); BE = BB->end();
+              Changed |= res;
+            }
           }
 
         // Place the list of instructions to simplify on the next loop iteration
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 15b3e66..3b61bb5 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -43,7 +44,7 @@ namespace {
 class LibCallOptimization {
 protected:
   Function *Caller;
-  const DataLayout *TD;
+  const DataLayout *DL;
   const TargetLibraryInfo *TLI;
   const LibCallSimplifier *LCS;
   LLVMContext* Context;
@@ -63,11 +64,11 @@ public:
   /// change the calling convention.
   virtual bool ignoreCallingConv() { return false; }
 
-  Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+  Value *optimizeCall(CallInst *CI, const DataLayout *DL,
                       const TargetLibraryInfo *TLI,
                       const LibCallSimplifier *LCS, IRBuilder<> &B) {
     Caller = CI->getParent()->getParent();
-    this->TD = TD;
+    this->DL = DL;
     this->TLI = TLI;
     this->LCS = LCS;
     if (CI->getCalledFunction())
@@ -75,7 +76,7 @@ public:
 
     // We never change the calling convention.
     if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
-      return NULL;
+      return nullptr;
 
     return callOptimizer(CI->getCalledFunction(), CI, B);
   }
@@ -88,9 +89,8 @@ public:
 /// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
 /// value is equal or not-equal to zero.
 static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
-       UI != E; ++UI) {
-    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+  for (User *U : V->users()) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
       if (IC->isEquality())
         if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
           if (C->isNullValue())
@@ -104,9 +104,8 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
 /// isOnlyUsedInEqualityComparison - Return true if it is only used in equality
 /// comparisons with With.
 static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
-       UI != E; ++UI) {
-    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+  for (User *U : V->users()) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
       if (IC->isEquality() && IC->getOperand(1) == With)
         continue;
     // Unknown instruction.
@@ -152,7 +151,8 @@ protected:
 struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
   CallInst *CI;
 
-  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+                  bool isString) const override {
     if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
       return true;
     if (ConstantInt *SizeCI =
@@ -175,7 +175,8 @@ struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
 };
 
 struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     this->CI = CI;
     FunctionType *FT = Callee->getFunctionType();
     LLVMContext &Context = CI->getParent()->getContext();
@@ -184,21 +185,22 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
     if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
-        FT->getParamType(2) != TD->getIntPtrType(Context) ||
-        FT->getParamType(3) != TD->getIntPtrType(Context))
-      return 0;
+        FT->getParamType(2) != DL->getIntPtrType(Context) ||
+        FT->getParamType(3) != DL->getIntPtrType(Context))
+      return nullptr;
 
     if (isFoldable(3, 2, false)) {
       B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
                      CI->getArgOperand(2), 1);
       return CI->getArgOperand(0);
     }
-    return 0;
+    return nullptr;
   }
 };
 
 struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     this->CI = CI;
     FunctionType *FT = Callee->getFunctionType();
     LLVMContext &Context = CI->getParent()->getContext();
@@ -207,21 +209,22 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
     if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
-        FT->getParamType(2) != TD->getIntPtrType(Context) ||
-        FT->getParamType(3) != TD->getIntPtrType(Context))
-      return 0;
+        FT->getParamType(2) != DL->getIntPtrType(Context) ||
+        FT->getParamType(3) != DL->getIntPtrType(Context))
+      return nullptr;
 
     if (isFoldable(3, 2, false)) {
       B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
                       CI->getArgOperand(2), 1);
       return CI->getArgOperand(0);
     }
-    return 0;
+    return nullptr;
   }
 };
 
 struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     this->CI = CI;
     FunctionType *FT = Callee->getFunctionType();
     LLVMContext &Context = CI->getParent()->getContext();
@@ -230,9 +233,9 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
     if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isIntegerTy() ||
-        FT->getParamType(2) != TD->getIntPtrType(Context) ||
-        FT->getParamType(3) != TD->getIntPtrType(Context))
-      return 0;
+        FT->getParamType(2) != DL->getIntPtrType(Context) ||
+        FT->getParamType(3) != DL->getIntPtrType(Context))
+      return nullptr;
 
     if (isFoldable(3, 2, false)) {
       Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
@@ -240,12 +243,13 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
       B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
       return CI->getArgOperand(0);
     }
-    return 0;
+    return nullptr;
   }
 };
 
 struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     this->CI = CI;
     StringRef Name = Callee->getName();
     FunctionType *FT = Callee->getFunctionType();
@@ -256,8 +260,8 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
         FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
-        FT->getParamType(2) != TD->getIntPtrType(Context))
-      return 0;
+        FT->getParamType(2) != DL->getIntPtrType(Context))
+      return nullptr;
 
     Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
     if (Dst == Src)      // __strcpy_chk(x,x)  -> x
@@ -269,28 +273,29 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
     // TODO: It might be nice to get a maximum length out of the possible
     // string lengths for varying.
     if (isFoldable(2, 1, true)) {
-      Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+      Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
       return Ret;
     } else {
       // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
       uint64_t Len = GetStringLength(Src);
-      if (Len == 0) return 0;
+      if (Len == 0) return nullptr;
 
       // This optimization require DataLayout.
-      if (!TD) return 0;
+      if (!DL) return nullptr;
 
       Value *Ret =
 	EmitMemCpyChk(Dst, Src,
-                      ConstantInt::get(TD->getIntPtrType(Context), Len),
-                      CI->getArgOperand(2), B, TD, TLI);
+                      ConstantInt::get(DL->getIntPtrType(Context), Len),
+                      CI->getArgOperand(2), B, DL, TLI);
       return Ret;
     }
-    return 0;
+    return nullptr;
   }
 };
 
 struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     this->CI = CI;
     StringRef Name = Callee->getName();
     FunctionType *FT = Callee->getFunctionType();
@@ -301,13 +306,13 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
         FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
-        FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
-      return 0;
+        FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
+      return nullptr;
 
     Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
     if (Dst == Src) {  // stpcpy(x,x)  -> x+strlen(x)
-      Value *StrLen = EmitStrLen(Src, B, TD, TLI);
-      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+      Value *StrLen = EmitStrLen(Src, B, DL, TLI);
+      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
     }
 
     // If a) we don't have any length information, or b) we know this will
@@ -316,31 +321,32 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
     // TODO: It might be nice to get a maximum length out of the possible
     // string lengths for varying.
     if (isFoldable(2, 1, true)) {
-      Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+      Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
       return Ret;
     } else {
       // Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
       uint64_t Len = GetStringLength(Src);
-      if (Len == 0) return 0;
+      if (Len == 0) return nullptr;
 
       // This optimization require DataLayout.
-      if (!TD) return 0;
+      if (!DL) return nullptr;
 
       Type *PT = FT->getParamType(0);
-      Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+      Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
       Value *DstEnd = B.CreateGEP(Dst,
-                                  ConstantInt::get(TD->getIntPtrType(PT),
+                                  ConstantInt::get(DL->getIntPtrType(PT),
                                                    Len - 1));
-      if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI))
-        return 0;
+      if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI))
+        return nullptr;
       return DstEnd;
     }
-    return 0;
+    return nullptr;
   }
 };
 
 struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     this->CI = CI;
     StringRef Name = Callee->getName();
     FunctionType *FT = Callee->getFunctionType();
@@ -351,16 +357,16 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
         !FT->getParamType(2)->isIntegerTy() ||
-        FT->getParamType(3) != TD->getIntPtrType(Context))
-      return 0;
+        FT->getParamType(3) != DL->getIntPtrType(Context))
+      return nullptr;
 
     if (isFoldable(3, 2, false)) {
       Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                               CI->getArgOperand(2), B, TD, TLI,
+                               CI->getArgOperand(2), B, DL, TLI,
                                Name.substr(2, 7));
       return Ret;
     }
-    return 0;
+    return nullptr;
   }
 };
 
@@ -369,14 +375,15 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
 //===----------------------------------------------------------------------===//
 
 struct StrCatOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Verify the "strcat" function prototype.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getReturnType() != B.getInt8PtrTy() ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType())
-      return 0;
+      return nullptr;
 
     // Extract some information from the instruction
     Value *Dst = CI->getArgOperand(0);
@@ -384,7 +391,7 @@ struct StrCatOpt : public LibCallOptimization {
 
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
-    if (Len == 0) return 0;
+    if (Len == 0) return nullptr;
     --Len;  // Unbias length.
 
     // Handle the simple, do-nothing case: strcat(x, "") -> x
@@ -392,7 +399,7 @@ struct StrCatOpt : public LibCallOptimization {
       return Dst;
 
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     return emitStrLenMemCpy(Src, Dst, Len, B);
   }
@@ -401,9 +408,9 @@ struct StrCatOpt : public LibCallOptimization {
                           IRBuilder<> &B) {
     // We need to find the end of the destination string.  That's where the
     // memory is to be moved to. We just generate a call to strlen.
-    Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+    Value *DstLen = EmitStrLen(Dst, B, DL, TLI);
     if (!DstLen)
-      return 0;
+      return nullptr;
 
     // Now that we have the destination's length, we must index into the
     // destination's pointer to get the actual memcpy destination (end of
@@ -413,13 +420,14 @@ struct StrCatOpt : public LibCallOptimization {
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     B.CreateMemCpy(CpyDst, Src,
-                   ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+                   ConstantInt::get(DL->getIntPtrType(*Context), Len + 1), 1);
     return Dst;
   }
 };
 
 struct StrNCatOpt : public StrCatOpt {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Verify the "strncat" function prototype.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
@@ -427,7 +435,7 @@ struct StrNCatOpt : public StrCatOpt {
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType() ||
         !FT->getParamType(2)->isIntegerTy())
-      return 0;
+      return nullptr;
 
     // Extract some information from the instruction
     Value *Dst = CI->getArgOperand(0);
@@ -438,11 +446,11 @@ struct StrNCatOpt : public StrCatOpt {
     if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
       Len = LengthArg->getZExtValue();
     else
-      return 0;
+      return nullptr;
 
     // See if we can get the length of the input string.
     uint64_t SrcLen = GetStringLength(Src);
-    if (SrcLen == 0) return 0;
+    if (SrcLen == 0) return nullptr;
     --SrcLen;  // Unbias length.
 
     // Handle the simple, do-nothing cases:
@@ -451,10 +459,10 @@ struct StrNCatOpt : public StrCatOpt {
     if (SrcLen == 0 || Len == 0) return Dst;
 
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     // We don't optimize this case
-    if (Len < SrcLen) return 0;
+    if (Len < SrcLen) return nullptr;
 
     // strncat(x, s, c) -> strcat(x, s)
     // s is constant so the strcat can be optimized further
@@ -463,38 +471,42 @@ struct StrNCatOpt : public StrCatOpt {
 };
 
 struct StrChrOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Verify the "strchr" function prototype.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getReturnType() != B.getInt8PtrTy() ||
         FT->getParamType(0) != FT->getReturnType() ||
         !FT->getParamType(1)->isIntegerTy(32))
-      return 0;
+      return nullptr;
 
     Value *SrcStr = CI->getArgOperand(0);
 
     // If the second operand is non-constant, see if we can compute the length
     // of the input string and turn this into memchr.
     ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-    if (CharC == 0) {
+    if (!CharC) {
       // These optimizations require DataLayout.
-      if (!TD) return 0;
+      if (!DL) return nullptr;
 
       uint64_t Len = GetStringLength(SrcStr);
       if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
-        return 0;
+        return nullptr;
 
       return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
-                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
-                        B, TD, TLI);
+                        ConstantInt::get(DL->getIntPtrType(*Context), Len),
+                        B, DL, TLI);
     }
 
     // Otherwise, the character is a constant, see if the first argument is
     // a string literal.  If so, we can constant fold.
     StringRef Str;
-    if (!getConstantStringInfo(SrcStr, Str))
-      return 0;
+    if (!getConstantStringInfo(SrcStr, Str)) {
+      if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+        return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+      return nullptr;
+    }
 
     // Compute the offset, make sure to handle the case when we're searching for
     // zero (a weird way to spell strlen).
@@ -509,28 +521,29 @@ struct StrChrOpt : public LibCallOptimization {
 };
 
 struct StrRChrOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Verify the "strrchr" function prototype.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getReturnType() != B.getInt8PtrTy() ||
         FT->getParamType(0) != FT->getReturnType() ||
         !FT->getParamType(1)->isIntegerTy(32))
-      return 0;
+      return nullptr;
 
     Value *SrcStr = CI->getArgOperand(0);
     ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
 
     // Cannot fold anything if we're not looking for a constant.
     if (!CharC)
-      return 0;
+      return nullptr;
 
     StringRef Str;
     if (!getConstantStringInfo(SrcStr, Str)) {
       // strrchr(s, 0) -> strchr(s, 0)
-      if (TD && CharC->isZero())
-        return EmitStrChr(SrcStr, '\0', B, TD, TLI);
-      return 0;
+      if (DL && CharC->isZero())
+        return EmitStrChr(SrcStr, '\0', B, DL, TLI);
+      return nullptr;
     }
 
     // Compute the offset.
@@ -545,14 +558,15 @@ struct StrRChrOpt : public LibCallOptimization {
 };
 
 struct StrCmpOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Verify the "strcmp" function prototype.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         !FT->getReturnType()->isIntegerTy(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != B.getInt8PtrTy())
-      return 0;
+      return nullptr;
 
     Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
     if (Str1P == Str2P)      // strcmp(x,x)  -> 0
@@ -578,19 +592,20 @@ struct StrCmpOpt : public LibCallOptimization {
     uint64_t Len2 = GetStringLength(Str2P);
     if (Len1 && Len2) {
       // These optimizations require DataLayout.
-      if (!TD) return 0;
+      if (!DL) return nullptr;
 
       return EmitMemCmp(Str1P, Str2P,
-                        ConstantInt::get(TD->getIntPtrType(*Context),
-                        std::min(Len1, Len2)), B, TD, TLI);
+                        ConstantInt::get(DL->getIntPtrType(*Context),
+                        std::min(Len1, Len2)), B, DL, TLI);
     }
 
-    return 0;
+    return nullptr;
   }
 };
 
 struct StrNCmpOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Verify the "strncmp" function prototype.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
@@ -598,7 +613,7 @@ struct StrNCmpOpt : public LibCallOptimization {
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != B.getInt8PtrTy() ||
         !FT->getParamType(2)->isIntegerTy())
-      return 0;
+      return nullptr;
 
     Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
     if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
@@ -609,13 +624,13 @@ struct StrNCmpOpt : public LibCallOptimization {
     if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
       Length = LengthArg->getZExtValue();
     else
-      return 0;
+      return nullptr;
 
     if (Length == 0) // strncmp(x,y,0)   -> 0
       return ConstantInt::get(CI->getType(), 0);
 
-    if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
-      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+    if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
 
     StringRef Str1, Str2;
     bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -635,66 +650,68 @@ struct StrNCmpOpt : public LibCallOptimization {
     if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
       return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
 
-    return 0;
+    return nullptr;
   }
 };
 
 struct StrCpyOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Verify the "strcpy" function prototype.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != B.getInt8PtrTy())
-      return 0;
+      return nullptr;
 
     Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
     if (Dst == Src)      // strcpy(x,x)  -> x
       return Src;
 
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
-    if (Len == 0) return 0;
+    if (Len == 0) return nullptr;
 
     // We have enough information to now generate the memcpy call to do the
     // copy for us.  Make a memcpy to copy the nul byte with align = 1.
     B.CreateMemCpy(Dst, Src,
-		   ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+		   ConstantInt::get(DL->getIntPtrType(*Context), Len), 1);
     return Dst;
   }
 };
 
 struct StpCpyOpt: public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Verify the "stpcpy" function prototype.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != B.getInt8PtrTy())
-      return 0;
+      return nullptr;
 
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
     if (Dst == Src) {  // stpcpy(x,x)  -> x+strlen(x)
-      Value *StrLen = EmitStrLen(Src, B, TD, TLI);
-      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+      Value *StrLen = EmitStrLen(Src, B, DL, TLI);
+      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
     }
 
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
-    if (Len == 0) return 0;
+    if (Len == 0) return nullptr;
 
     Type *PT = FT->getParamType(0);
-    Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+    Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
     Value *DstEnd = B.CreateGEP(Dst,
-                                ConstantInt::get(TD->getIntPtrType(PT),
+                                ConstantInt::get(DL->getIntPtrType(PT),
                                                  Len - 1));
 
     // We have enough information to now generate the memcpy call to do the
@@ -705,13 +722,14 @@ struct StpCpyOpt: public LibCallOptimization {
 };
 
 struct StrNCpyOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != B.getInt8PtrTy() ||
         !FT->getParamType(2)->isIntegerTy())
-      return 0;
+      return nullptr;
 
     Value *Dst = CI->getArgOperand(0);
     Value *Src = CI->getArgOperand(1);
@@ -719,7 +737,7 @@ struct StrNCpyOpt : public LibCallOptimization {
 
     // See if we can get the length of the input string.
     uint64_t SrcLen = GetStringLength(Src);
-    if (SrcLen == 0) return 0;
+    if (SrcLen == 0) return nullptr;
     --SrcLen;
 
     if (SrcLen == 0) {
@@ -732,33 +750,34 @@ struct StrNCpyOpt : public LibCallOptimization {
     if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
       Len = LengthArg->getZExtValue();
     else
-      return 0;
+      return nullptr;
 
     if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
 
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     // Let strncpy handle the zero padding
-    if (Len > SrcLen+1) return 0;
+    if (Len > SrcLen+1) return nullptr;
 
     Type *PT = FT->getParamType(0);
     // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
     B.CreateMemCpy(Dst, Src,
-                   ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
+                   ConstantInt::get(DL->getIntPtrType(PT), Len), 1);
 
     return Dst;
   }
 };
 
 struct StrLenOpt : public LibCallOptimization {
-  virtual bool ignoreCallingConv() { return true; }
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  bool ignoreCallingConv() override { return true; }
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 ||
         FT->getParamType(0) != B.getInt8PtrTy() ||
         !FT->getReturnType()->isIntegerTy())
-      return 0;
+      return nullptr;
 
     Value *Src = CI->getArgOperand(0);
 
@@ -766,22 +785,38 @@ struct StrLenOpt : public LibCallOptimization {
     if (uint64_t Len = GetStringLength(Src))
       return ConstantInt::get(CI->getType(), Len-1);
 
+    // strlen(x?"foo":"bars") --> x ? 3 : 4
+    if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
+      uint64_t LenTrue = GetStringLength(SI->getTrueValue());
+      uint64_t LenFalse = GetStringLength(SI->getFalseValue());
+      if (LenTrue && LenFalse) {
+        emitOptimizationRemark(*Context, "simplify-libcalls", *Caller,
+                               SI->getDebugLoc(),
+                               "folded strlen(select) to select of constants");
+        return B.CreateSelect(SI->getCondition(),
+                              ConstantInt::get(CI->getType(), LenTrue-1),
+                              ConstantInt::get(CI->getType(), LenFalse-1));
+      }
+    }
+
     // strlen(x) != 0 --> *x != 0
     // strlen(x) == 0 --> *x == 0
     if (isOnlyUsedInZeroEqualityComparison(CI))
       return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
-    return 0;
+
+    return nullptr;
   }
 };
 
 struct StrPBrkOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getParamType(0) != B.getInt8PtrTy() ||
         FT->getParamType(1) != FT->getParamType(0) ||
         FT->getReturnType() != FT->getParamType(0))
-      return 0;
+      return nullptr;
 
     StringRef S1, S2;
     bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -802,20 +837,21 @@ struct StrPBrkOpt : public LibCallOptimization {
     }
 
     // strpbrk(s, "a") -> strchr(s, 'a')
-    if (TD && HasS2 && S2.size() == 1)
-      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
+    if (DL && HasS2 && S2.size() == 1)
+      return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
 
-    return 0;
+    return nullptr;
   }
 };
 
 struct StrToOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy())
-      return 0;
+      return nullptr;
 
     Value *EndPtr = CI->getArgOperand(1);
     if (isa<ConstantPointerNull>(EndPtr)) {
@@ -824,18 +860,19 @@ struct StrToOpt : public LibCallOptimization {
       CI->addAttribute(1, Attribute::NoCapture);
     }
 
-    return 0;
+    return nullptr;
   }
 };
 
 struct StrSpnOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getParamType(0) != B.getInt8PtrTy() ||
         FT->getParamType(1) != FT->getParamType(0) ||
         !FT->getReturnType()->isIntegerTy())
-      return 0;
+      return nullptr;
 
     StringRef S1, S2;
     bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -853,18 +890,19 @@ struct StrSpnOpt : public LibCallOptimization {
       return ConstantInt::get(CI->getType(), Pos);
     }
 
-    return 0;
+    return nullptr;
   }
 };
 
 struct StrCSpnOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getParamType(0) != B.getInt8PtrTy() ||
         FT->getParamType(1) != FT->getParamType(0) ||
         !FT->getReturnType()->isIntegerTy())
-      return 0;
+      return nullptr;
 
     StringRef S1, S2;
     bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -882,37 +920,37 @@ struct StrCSpnOpt : public LibCallOptimization {
     }
 
     // strcspn(s, "") -> strlen(s)
-    if (TD && HasS2 && S2.empty())
-      return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
+    if (DL && HasS2 && S2.empty())
+      return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
 
-    return 0;
+    return nullptr;
   }
 };
 
 struct StrStrOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
         !FT->getReturnType()->isPointerTy())
-      return 0;
+      return nullptr;
 
     // fold strstr(x, x) -> x.
     if (CI->getArgOperand(0) == CI->getArgOperand(1))
       return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
 
     // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
-    if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
-      Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+    if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+      Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
       if (!StrLen)
-        return 0;
+        return nullptr;
       Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
-                                   StrLen, B, TD, TLI);
+                                   StrLen, B, DL, TLI);
       if (!StrNCmp)
-        return 0;
-      for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
-           UI != UE; ) {
+        return nullptr;
+      for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
         ICmpInst *Old = cast<ICmpInst>(*UI++);
         Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
                                   ConstantInt::getNullValue(StrNCmp->getType()),
@@ -946,20 +984,21 @@ struct StrStrOpt : public LibCallOptimization {
 
     // fold strstr(x, "y") -> strchr(x, 'y').
     if (HasStr2 && ToFindStr.size() == 1) {
-      Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
-      return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+      Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
+      return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
     }
-    return 0;
+    return nullptr;
   }
 };
 
 struct MemCmpOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
         !FT->getReturnType()->isIntegerTy(32))
-      return 0;
+      return nullptr;
 
     Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
 
@@ -968,7 +1007,7 @@ struct MemCmpOpt : public LibCallOptimization {
 
     // Make sure we have a constant length.
     ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-    if (!LenC) return 0;
+    if (!LenC) return nullptr;
     uint64_t Len = LenC->getZExtValue();
 
     if (Len == 0) // memcmp(s1,s2,0) -> 0
@@ -989,7 +1028,7 @@ struct MemCmpOpt : public LibCallOptimization {
         getConstantStringInfo(RHS, RHSStr)) {
       // Make sure we're not reading out-of-bounds memory.
       if (Len > LHSStr.size() || Len > RHSStr.size())
-        return 0;
+        return nullptr;
       // Fold the memcmp and normalize the result.  This way we get consistent
       // results across multiple platforms.
       uint64_t Ret = 0;
@@ -1001,21 +1040,22 @@ struct MemCmpOpt : public LibCallOptimization {
       return ConstantInt::get(CI->getType(), Ret);
     }
 
-    return 0;
+    return nullptr;
   }
 };
 
 struct MemCpyOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
-        FT->getParamType(2) != TD->getIntPtrType(*Context))
-      return 0;
+        FT->getParamType(2) != DL->getIntPtrType(*Context))
+      return nullptr;
 
     // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
     B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -1025,16 +1065,17 @@ struct MemCpyOpt : public LibCallOptimization {
 };
 
 struct MemMoveOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
-        FT->getParamType(2) != TD->getIntPtrType(*Context))
-      return 0;
+        FT->getParamType(2) != DL->getIntPtrType(*Context))
+      return nullptr;
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
     B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -1044,16 +1085,17 @@ struct MemMoveOpt : public LibCallOptimization {
 };
 
 struct MemSetOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isIntegerTy() ||
-        FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
-      return 0;
+        FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
+      return nullptr;
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
     Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
@@ -1072,26 +1114,26 @@ struct MemSetOpt : public LibCallOptimization {
 struct UnaryDoubleFPOpt : public LibCallOptimization {
   bool CheckRetType;
   UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
         !FT->getParamType(0)->isDoubleTy())
-      return 0;
+      return nullptr;
 
     if (CheckRetType) {
       // Check if all the uses for function like 'sin' are converted to float.
-      for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
-          ++UseI) {
-        FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
-        if (Cast == 0 || !Cast->getType()->isFloatTy())
-          return 0;
+      for (User *U : CI->users()) {
+        FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
+        if (!Cast || !Cast->getType()->isFloatTy())
+          return nullptr;
       }
     }
 
     // If this is something like 'floor((double)floatval)', convert to floorf.
     FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
-    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
-      return 0;
+    if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy())
+      return nullptr;
 
     // floor((double)floatval) -> (double)floorf(floatval)
     Value *V = Cast->getOperand(0);
@@ -1100,6 +1142,49 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
   }
 };
 
+// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax'
+struct BinaryDoubleFPOpt : public LibCallOptimization {
+  bool CheckRetType;
+  BinaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return nullptr;
+
+    if (CheckRetType) {
+      // Check if all the uses for function like 'fmin/fmax' are converted to
+      // float.
+      for (User *U : CI->users()) {
+        FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
+        if (!Cast || !Cast->getType()->isFloatTy())
+          return nullptr;
+      }
+    }
+
+    // If this is something like 'fmin((double)floatval1, (double)floatval2)',
+    // we convert it to fminf.
+    FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+    FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1));
+    if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() ||
+        !Cast2 || !Cast2->getOperand(0)->getType()->isFloatTy())
+      return nullptr;
+
+    // fmin((double)floatval1, (double)floatval2)
+    //                      -> (double)fmin(floatval1, floatval2)
+    Value *V = nullptr;
+    Value *V1 = Cast1->getOperand(0);
+    Value *V2 = Cast2->getOperand(0);
+    V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
+                              Callee->getAttributes());
+    return B.CreateFPExt(V, B.getDoubleTy());
+  }
+};
+
 struct UnsafeFPLibCallOptimization : public LibCallOptimization {
   bool UnsafeFPShrink;
   UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
@@ -1109,8 +1194,9 @@ struct UnsafeFPLibCallOptimization : public LibCallOptimization {
 
 struct CosOpt : public UnsafeFPLibCallOptimization {
   CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    Value *Ret = NULL;
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
+    Value *Ret = nullptr;
     if (UnsafeFPShrink && Callee->getName() == "cos" &&
         TLI->has(LibFunc::cosf)) {
       UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1136,8 +1222,9 @@ struct CosOpt : public UnsafeFPLibCallOptimization {
 
 struct PowOpt : public UnsafeFPLibCallOptimization {
   PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    Value *Ret = NULL;
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
+    Value *Ret = nullptr;
     if (UnsafeFPShrink && Callee->getName() == "pow" &&
         TLI->has(LibFunc::powf)) {
       UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1162,10 +1249,16 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
           hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
                           LibFunc::exp2l))
         return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+      // pow(10.0, x) -> exp10(x)
+      if (Op1C->isExactlyValue(10.0) &&
+          hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
+                          LibFunc::exp10l))
+        return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
+                                    Callee->getAttributes());
     }
 
     ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
-    if (Op2C == 0) return Ret;
+    if (!Op2C) return Ret;
 
     if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
       return ConstantFP::get(CI->getType(), 1.0);
@@ -1198,14 +1291,15 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
     if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
       return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
                           Op1, "powrecip");
-    return 0;
+    return nullptr;
   }
 };
 
 struct Exp2Opt : public UnsafeFPLibCallOptimization {
   Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    Value *Ret = NULL;
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
+    Value *Ret = nullptr;
     if (UnsafeFPShrink && Callee->getName() == "exp2" &&
         TLI->has(LibFunc::exp2f)) {
       UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1222,37 +1316,37 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
     Value *Op = CI->getArgOperand(0);
     // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
     // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
-    Value *LdExpArg = 0;
-    if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
-      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
-        LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
-    } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
-      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
-        LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
-    }
+    LibFunc::Func LdExp = LibFunc::ldexpl;
+    if (Op->getType()->isFloatTy())
+      LdExp = LibFunc::ldexpf;
+    else if (Op->getType()->isDoubleTy())
+      LdExp = LibFunc::ldexp;
+
+    if (TLI->has(LdExp)) {
+      Value *LdExpArg = nullptr;
+      if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+        if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+          LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+      } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+        if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+          LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+      }
 
-    if (LdExpArg) {
-      const char *Name;
-      if (Op->getType()->isFloatTy())
-        Name = "ldexpf";
-      else if (Op->getType()->isDoubleTy())
-        Name = "ldexp";
-      else
-        Name = "ldexpl";
-
-      Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
-      if (!Op->getType()->isFloatTy())
-        One = ConstantExpr::getFPExtend(One, Op->getType());
-
-      Module *M = Caller->getParent();
-      Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
-                                             Op->getType(),
-                                             B.getInt32Ty(), NULL);
-      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
-      if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
-        CI->setCallingConv(F->getCallingConv());
+      if (LdExpArg) {
+        Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+        if (!Op->getType()->isFloatTy())
+          One = ConstantExpr::getFPExtend(One, Op->getType());
 
-      return CI;
+        Module *M = Caller->getParent();
+        Value *Callee =
+            M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
+                                   Op->getType(), B.getInt32Ty(), NULL);
+        CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+        if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+          CI->setCallingConv(F->getCallingConv());
+
+        return CI;
+      }
     }
     return Ret;
   }
@@ -1261,11 +1355,12 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
 struct SinCosPiOpt : public LibCallOptimization {
   SinCosPiOpt() {}
 
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Make sure the prototype is as expected, otherwise the rest of the
     // function is probably invalid and likely to abort.
     if (!isTrigLibCall(CI))
-      return 0;
+      return nullptr;
 
     Value *Arg = CI->getArgOperand(0);
     SmallVector<CallInst *, 1> SinCalls;
@@ -1277,14 +1372,13 @@ struct SinCosPiOpt : public LibCallOptimization {
     // Look for all compatible sinpi, cospi and sincospi calls with the same
     // argument. If there are enough (in some sense) we can make the
     // substitution.
-    for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
-         UI != UE; ++UI)
-      classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
+    for (User *U : Arg->users())
+      classifyArgUse(U, CI->getParent(), IsFloat, SinCalls, CosCalls,
                      SinCosCalls);
 
     // It's only worthwhile if both sinpi and cospi are actually used.
     if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
-      return 0;
+      return nullptr;
 
     Value *Sin, *Cos, *SinCos;
     insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
@@ -1294,7 +1388,7 @@ struct SinCosPiOpt : public LibCallOptimization {
     replaceTrigInsts(CosCalls, Cos);
     replaceTrigInsts(SinCosCalls, SinCos);
 
-    return 0;
+    return nullptr;
   }
 
   bool isTrigLibCall(CallInst *CI) {
@@ -1334,7 +1428,7 @@ struct SinCosPiOpt : public LibCallOptimization {
         SinCalls.push_back(CI);
       else if (Func == LibFunc::cospif)
         CosCalls.push_back(CI);
-      else if (Func == LibFunc::sincospi_stretf)
+      else if (Func == LibFunc::sincospif_stret)
         SinCosCalls.push_back(CI);
     } else {
       if (Func == LibFunc::sinpi)
@@ -1363,7 +1457,7 @@ struct SinCosPiOpt : public LibCallOptimization {
 
     Triple T(OrigCallee->getParent()->getTargetTriple());
     if (UseFloat) {
-      Name = "__sincospi_stretf";
+      Name = "__sincospif_stret";
 
       assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
       // x86_64 can't use {float, float} since that would be returned in both
@@ -1412,14 +1506,15 @@ struct SinCosPiOpt : public LibCallOptimization {
 //===----------------------------------------------------------------------===//
 
 struct FFSOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 2 arguments of the same FP type, which match the
     // result type.
     if (FT->getNumParams() != 1 ||
         !FT->getReturnType()->isIntegerTy(32) ||
         !FT->getParamType(0)->isIntegerTy())
-      return 0;
+      return nullptr;
 
     Value *Op = CI->getArgOperand(0);
 
@@ -1445,13 +1540,14 @@ struct FFSOpt : public LibCallOptimization {
 };
 
 struct AbsOpt : public LibCallOptimization {
-  virtual bool ignoreCallingConv() { return true; }
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  bool ignoreCallingConv() override { return true; }
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     // We require integer(integer) where the types agree.
     if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
         FT->getParamType(0) != FT->getReturnType())
-      return 0;
+      return nullptr;
 
     // abs(x) -> x >s -1 ? x : -x
     Value *Op = CI->getArgOperand(0);
@@ -1463,12 +1559,13 @@ struct AbsOpt : public LibCallOptimization {
 };
 
 struct IsDigitOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
         !FT->getParamType(0)->isIntegerTy(32))
-      return 0;
+      return nullptr;
 
     // isdigit(c) -> (c-'0') <u 10
     Value *Op = CI->getArgOperand(0);
@@ -1479,12 +1576,13 @@ struct IsDigitOpt : public LibCallOptimization {
 };
 
 struct IsAsciiOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
         !FT->getParamType(0)->isIntegerTy(32))
-      return 0;
+      return nullptr;
 
     // isascii(c) -> c <u 128
     Value *Op = CI->getArgOperand(0);
@@ -1494,12 +1592,13 @@ struct IsAsciiOpt : public LibCallOptimization {
 };
 
 struct ToAsciiOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     FunctionType *FT = Callee->getFunctionType();
     // We require i32(i32)
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isIntegerTy(32))
-      return 0;
+      return nullptr;
 
     // toascii(c) -> c & 0x7f
     return B.CreateAnd(CI->getArgOperand(0),
@@ -1514,7 +1613,8 @@ struct ToAsciiOpt : public LibCallOptimization {
 struct ErrorReportingOpt : public LibCallOptimization {
   ErrorReportingOpt(int S = -1) : StreamArg(S) {}
 
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &) override {
     // Error reporting calls should be cold, mark them as such.
     // This applies even to non-builtin calls: it is only a hint and applies to
     // functions that the frontend might not understand as builtins.
@@ -1528,7 +1628,7 @@ struct ErrorReportingOpt : public LibCallOptimization {
       CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
     }
 
-    return 0;
+    return nullptr;
   }
 
 protected:
@@ -1565,7 +1665,7 @@ struct PrintFOpt : public LibCallOptimization {
     // Check for a fixed format string.
     StringRef FormatStr;
     if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
-      return 0;
+      return nullptr;
 
     // Empty format string -> noop.
     if (FormatStr.empty())  // Tolerate printf's declared void.
@@ -1576,11 +1676,11 @@ struct PrintFOpt : public LibCallOptimization {
     // is used, in general the printf return value is not compatible with either
     // putchar() or puts().
     if (!CI->use_empty())
-      return 0;
+      return nullptr;
 
     // printf("x") -> putchar('x'), even for '%'.
     if (FormatStr.size() == 1) {
-      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI);
       if (CI->use_empty() || !Res) return Res;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
@@ -1592,7 +1692,7 @@ struct PrintFOpt : public LibCallOptimization {
       // pass to be run after this pass, to merge duplicate strings.
       FormatStr = FormatStr.drop_back();
       Value *GV = B.CreateGlobalString(FormatStr, "str");
-      Value *NewCI = EmitPutS(GV, B, TD, TLI);
+      Value *NewCI = EmitPutS(GV, B, DL, TLI);
       return (CI->use_empty() || !NewCI) ?
               NewCI :
               ConstantInt::get(CI->getType(), FormatStr.size()+1);
@@ -1602,7 +1702,7 @@ struct PrintFOpt : public LibCallOptimization {
     // printf("%c", chr) --> putchar(chr)
     if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
         CI->getArgOperand(1)->getType()->isIntegerTy()) {
-      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
+      Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI);
 
       if (CI->use_empty() || !Res) return Res;
       return B.CreateIntCast(Res, CI->getType(), true);
@@ -1611,18 +1711,19 @@ struct PrintFOpt : public LibCallOptimization {
     // printf("%s\n", str) --> puts(str)
     if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
         CI->getArgOperand(1)->getType()->isPointerTy()) {
-      return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
+      return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
     }
-    return 0;
+    return nullptr;
   }
 
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Require one fixed pointer argument and an integer/void result.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
         !(FT->getReturnType()->isIntegerTy() ||
           FT->getReturnType()->isVoidTy()))
-      return 0;
+      return nullptr;
 
     if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
       return V;
@@ -1639,7 +1740,7 @@ struct PrintFOpt : public LibCallOptimization {
       B.Insert(New);
       return New;
     }
-    return 0;
+    return nullptr;
   }
 };
 
@@ -1649,7 +1750,7 @@ struct SPrintFOpt : public LibCallOptimization {
     // Check for a fixed format string.
     StringRef FormatStr;
     if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
-      return 0;
+      return nullptr;
 
     // If we just have a format string (nothing else crazy) transform it.
     if (CI->getNumArgOperands() == 2) {
@@ -1657,14 +1758,14 @@ struct SPrintFOpt : public LibCallOptimization {
       // %% -> % in the future if we cared.
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
         if (FormatStr[i] == '%')
-          return 0; // we found a format specifier, bail out.
+          return nullptr; // we found a format specifier, bail out.
 
       // These optimizations require DataLayout.
-      if (!TD) return 0;
+      if (!DL) return nullptr;
 
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
       B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+                     ConstantInt::get(DL->getIntPtrType(*Context), // Copy the
                                       FormatStr.size() + 1), 1);   // nul byte.
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
@@ -1673,12 +1774,12 @@ struct SPrintFOpt : public LibCallOptimization {
     // and have an extra operand.
     if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
         CI->getNumArgOperands() < 3)
-      return 0;
+      return nullptr;
 
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
-      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr;
       Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
       Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
       B.CreateStore(V, Ptr);
@@ -1690,14 +1791,14 @@ struct SPrintFOpt : public LibCallOptimization {
 
     if (FormatStr[1] == 's') {
       // These optimizations require DataLayout.
-      if (!TD) return 0;
+      if (!DL) return nullptr;
 
       // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr;
 
-      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+      Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI);
       if (!Len)
-        return 0;
+        return nullptr;
       Value *IncLen = B.CreateAdd(Len,
                                   ConstantInt::get(Len->getType(), 1),
                                   "leninc");
@@ -1706,16 +1807,17 @@ struct SPrintFOpt : public LibCallOptimization {
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);
     }
-    return 0;
+    return nullptr;
   }
 
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Require two fixed pointer arguments and an integer result.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
         !FT->getReturnType()->isIntegerTy())
-      return 0;
+      return nullptr;
 
     if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
       return V;
@@ -1732,7 +1834,7 @@ struct SPrintFOpt : public LibCallOptimization {
       B.Insert(New);
       return New;
     }
-    return 0;
+    return nullptr;
   }
 };
 
@@ -1745,58 +1847,59 @@ struct FPrintFOpt : public LibCallOptimization {
     // All the optimizations depend on the format string.
     StringRef FormatStr;
     if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
-      return 0;
+      return nullptr;
 
     // Do not do any of the following transformations if the fprintf return
     // value is used, in general the fprintf return value is not compatible
     // with fwrite(), fputc() or fputs().
     if (!CI->use_empty())
-      return 0;
+      return nullptr;
 
     // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
     if (CI->getNumArgOperands() == 2) {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
         if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
-          return 0; // We found a format specifier.
+          return nullptr; // We found a format specifier.
 
       // These optimizations require DataLayout.
-      if (!TD) return 0;
+      if (!DL) return nullptr;
 
       return EmitFWrite(CI->getArgOperand(1),
-                        ConstantInt::get(TD->getIntPtrType(*Context),
+                        ConstantInt::get(DL->getIntPtrType(*Context),
                                          FormatStr.size()),
-                        CI->getArgOperand(0), B, TD, TLI);
+                        CI->getArgOperand(0), B, DL, TLI);
     }
 
     // The remaining optimizations require the format string to be "%s" or "%c"
     // and have an extra operand.
     if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
         CI->getNumArgOperands() < 3)
-      return 0;
+      return nullptr;
 
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> fputc(chr, F)
-      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr;
+      return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
     }
 
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) --> fputs(str, F)
       if (!CI->getArgOperand(2)->getType()->isPointerTy())
-        return 0;
-      return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+        return nullptr;
+      return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
     }
-    return 0;
+    return nullptr;
   }
 
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Require two fixed paramters as pointers and integer result.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
         !FT->getReturnType()->isIntegerTy())
-      return 0;
+      return nullptr;
 
     if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
       return V;
@@ -1813,12 +1916,13 @@ struct FPrintFOpt : public LibCallOptimization {
       B.Insert(New);
       return New;
     }
-    return 0;
+    return nullptr;
   }
 };
 
 struct FWriteOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     ErrorReportingOpt ER(/* StreamArg = */ 3);
     (void) ER.callOptimizer(Callee, CI, B);
 
@@ -1829,12 +1933,12 @@ struct FWriteOpt : public LibCallOptimization {
         !FT->getParamType(2)->isIntegerTy() ||
         !FT->getParamType(3)->isPointerTy() ||
         !FT->getReturnType()->isIntegerTy())
-      return 0;
+      return nullptr;
 
     // Get the element size and count.
     ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
     ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-    if (!SizeC || !CountC) return 0;
+    if (!SizeC || !CountC) return nullptr;
     uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
 
     // If this is writing zero records, remove the call (it's a noop).
@@ -1845,61 +1949,63 @@ struct FWriteOpt : public LibCallOptimization {
     // This optimisation is only valid, if the return value is unused.
     if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
-      Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+      Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
     }
 
-    return 0;
+    return nullptr;
   }
 };
 
 struct FPutsOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     ErrorReportingOpt ER(/* StreamArg = */ 1);
     (void) ER.callOptimizer(Callee, CI, B);
 
     // These optimizations require DataLayout.
-    if (!TD) return 0;
+    if (!DL) return nullptr;
 
     // Require two pointers.  Also, we can't optimize if return value is used.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isPointerTy() ||
         !CI->use_empty())
-      return 0;
+      return nullptr;
 
     // fputs(s,F) --> fwrite(s,1,strlen(s),F)
     uint64_t Len = GetStringLength(CI->getArgOperand(0));
-    if (!Len) return 0;
+    if (!Len) return nullptr;
     // Known to have no uses (see above).
     return EmitFWrite(CI->getArgOperand(0),
-                      ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
-                      CI->getArgOperand(1), B, TD, TLI);
+                      ConstantInt::get(DL->getIntPtrType(*Context), Len-1),
+                      CI->getArgOperand(1), B, DL, TLI);
   }
 };
 
 struct PutsOpt : public LibCallOptimization {
-  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+  Value *callOptimizer(Function *Callee, CallInst *CI,
+                       IRBuilder<> &B) override {
     // Require one fixed pointer argument and an integer/void result.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
         !(FT->getReturnType()->isIntegerTy() ||
           FT->getReturnType()->isVoidTy()))
-      return 0;
+      return nullptr;
 
     // Check for a constant string.
     StringRef Str;
     if (!getConstantStringInfo(CI->getArgOperand(0), Str))
-      return 0;
+      return nullptr;
 
     if (Str.empty() && CI->use_empty()) {
       // puts("") -> putchar('\n')
-      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+      Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI);
       if (CI->use_empty() || !Res) return Res;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
 
-    return 0;
+    return nullptr;
   }
 };
 
@@ -1908,7 +2014,7 @@ struct PutsOpt : public LibCallOptimization {
 namespace llvm {
 
 class LibCallSimplifierImpl {
-  const DataLayout *TD;
+  const DataLayout *DL;
   const TargetLibraryInfo *TLI;
   const LibCallSimplifier *LCS;
   bool UnsafeFPShrink;
@@ -1918,11 +2024,11 @@ class LibCallSimplifierImpl {
   PowOpt Pow;
   Exp2Opt Exp2;
 public:
-  LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
+  LibCallSimplifierImpl(const DataLayout *DL, const TargetLibraryInfo *TLI,
                         const LibCallSimplifier *LCS,
                         bool UnsafeFPShrink = false)
     : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
-    this->TD = TD;
+    this->DL = DL;
     this->TLI = TLI;
     this->LCS = LCS;
     this->UnsafeFPShrink = UnsafeFPShrink;
@@ -1975,6 +2081,7 @@ static MemSetOpt MemSet;
 
 // Math library call optimizations.
 static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static BinaryDoubleFPOpt BinaryDoubleFP(false);
 static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
 static SinCosPiOpt SinCosPi;
 
@@ -2009,7 +2116,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
     case Intrinsic::exp2:
        return &Exp2;
     default:
-       return 0;
+       return nullptr;
     }
   }
 
@@ -2119,7 +2226,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
       case LibFunc::trunc:
         if (hasFloatVersion(FuncName))
           return &UnaryDoubleFP;
-        return 0;
+        return nullptr;
       case LibFunc::acos:
       case LibFunc::acosh:
       case LibFunc::asin:
@@ -2143,11 +2250,16 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
       case LibFunc::tanh:
         if (UnsafeFPShrink && hasFloatVersion(FuncName))
          return &UnsafeUnaryDoubleFP;
-        return 0;
+        return nullptr;
+      case LibFunc::fmin:
+      case LibFunc::fmax:
+        if (hasFloatVersion(FuncName))
+          return &BinaryDoubleFP;
+        return nullptr;
       case LibFunc::memcpy_chk:
         return &MemCpyChk;
       default:
-        return 0;
+        return nullptr;
       }
   }
 
@@ -2167,7 +2279,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
       return &StrNCpyChk;
   }
 
-  return 0;
+  return nullptr;
 
 }
 
@@ -2175,15 +2287,15 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
   LibCallOptimization *LCO = lookupOptimization(CI);
   if (LCO) {
     IRBuilder<> Builder(CI);
-    return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
+    return LCO->optimizeCall(CI, DL, TLI, LCS, Builder);
   }
-  return 0;
+  return nullptr;
 }
 
-LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
                                      const TargetLibraryInfo *TLI,
                                      bool UnsafeFPShrink) {
-  Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink);
+  Impl = new LibCallSimplifierImpl(DL, TLI, this, UnsafeFPShrink);
 }
 
 LibCallSimplifier::~LibCallSimplifier() {
@@ -2191,7 +2303,7 @@ LibCallSimplifier::~LibCallSimplifier() {
 }
 
 Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
-  if (CI->isNoBuiltin()) return 0;
+  if (CI->isNoBuiltin()) return nullptr;
   return Impl->optimizeCall(CI);
 }
 
@@ -2242,8 +2354,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
 //   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
 //   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
 //
-// strchr:
-//   * strchr(p, 0) -> strlen(p)
 // tan, tanf, tanl:
 //   * tan(atan(x)) -> x
 //
diff --git a/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp
deleted file mode 100644
index 2ef692c..0000000
--- a/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions or global
-// variables, or to instrument some functions or global variables in a specific
-// way, based on a user-supplied list.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <string>
-#include <utility>
-
-namespace llvm {
-
-/// Represents a set of regular expressions.  Regular expressions which are
-/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
-/// others are represented as a single pipe-separated regex in RegEx.  The
-/// reason for doing so is efficiency; StringSet is much faster at matching
-/// literal strings than Regex.
-struct SpecialCaseList::Entry {
-  StringSet<> Strings;
-  Regex *RegEx;
-
-  Entry() : RegEx(0) {}
-
-  bool match(StringRef Query) const {
-    return Strings.count(Query) || (RegEx && RegEx->match(Query));
-  }
-};
-
-SpecialCaseList::SpecialCaseList() : Entries() {}
-
-SpecialCaseList *SpecialCaseList::create(
-    const StringRef Path, std::string &Error) {
-  if (Path.empty())
-    return new SpecialCaseList();
-  OwningPtr<MemoryBuffer> File;
-  if (error_code EC = MemoryBuffer::getFile(Path, File)) {
-    Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
-    return 0;
-  }
-  return create(File.get(), Error);
-}
-
-SpecialCaseList *SpecialCaseList::create(
-    const MemoryBuffer *MB, std::string &Error) {
-  OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
-  if (!SCL->parse(MB, Error))
-    return 0;
-  return SCL.take();
-}
-
-SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
-  std::string Error;
-  if (SpecialCaseList *SCL = create(Path, Error))
-    return SCL;
-  report_fatal_error(Error);
-}
-
-bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
-  // Iterate through each line in the blacklist file.
-  SmallVector<StringRef, 16> Lines;
-  SplitString(MB->getBuffer(), Lines, "\n\r");
-  StringMap<StringMap<std::string> > Regexps;
-  assert(Entries.empty() &&
-         "parse() should be called on an empty SpecialCaseList");
-  int LineNo = 1;
-  for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
-       I != E; ++I, ++LineNo) {
-    // Ignore empty lines and lines starting with "#"
-    if (I->empty() || I->startswith("#"))
-      continue;
-    // Get our prefix and unparsed regexp.
-    std::pair<StringRef, StringRef> SplitLine = I->split(":");
-    StringRef Prefix = SplitLine.first;
-    if (SplitLine.second.empty()) {
-      // Missing ':' in the line.
-      Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
-               SplitLine.first + "'").str();
-      return false;
-    }
-
-    std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
-    std::string Regexp = SplitRegexp.first;
-    StringRef Category = SplitRegexp.second;
-
-    // Backwards compatibility.
-    if (Prefix == "global-init") {
-      Prefix = "global";
-      Category = "init";
-    } else if (Prefix == "global-init-type") {
-      Prefix = "type";
-      Category = "init";
-    } else if (Prefix == "global-init-src") {
-      Prefix = "src";
-      Category = "init";
-    }
-
-    // See if we can store Regexp in Strings.
-    if (Regex::isLiteralERE(Regexp)) {
-      Entries[Prefix][Category].Strings.insert(Regexp);
-      continue;
-    }
-
-    // Replace * with .*
-    for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
-         pos += strlen(".*")) {
-      Regexp.replace(pos, strlen("*"), ".*");
-    }
-
-    // Check that the regexp is valid.
-    Regex CheckRE(Regexp);
-    std::string REError;
-    if (!CheckRE.isValid(REError)) {
-      Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
-               SplitLine.second + "': " + REError).str();
-      return false;
-    }
-
-    // Add this regexp into the proper group by its prefix.
-    if (!Regexps[Prefix][Category].empty())
-      Regexps[Prefix][Category] += "|";
-    Regexps[Prefix][Category] += "^" + Regexp + "$";
-  }
-
-  // Iterate through each of the prefixes, and create Regexs for them.
-  for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
-                                                          E = Regexps.end();
-       I != E; ++I) {
-    for (StringMap<std::string>::const_iterator II = I->second.begin(),
-                                                IE = I->second.end();
-         II != IE; ++II) {
-      Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
-    }
-  }
-  return true;
-}
-
-SpecialCaseList::~SpecialCaseList() {
-  for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
-                                              E = Entries.end();
-       I != E; ++I) {
-    for (StringMap<Entry>::const_iterator II = I->second.begin(),
-                                          IE = I->second.end();
-         II != IE; ++II) {
-      delete II->second.RegEx;
-    }
-  }
-}
-
-bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
-  return isIn(*F.getParent(), Category) ||
-         inSectionCategory("fun", F.getName(), Category);
-}
-
-static StringRef GetGlobalTypeString(const GlobalValue &G) {
-  // Types of GlobalVariables are always pointer types.
-  Type *GType = G.getType()->getElementType();
-  // For now we support blacklisting struct types only.
-  if (StructType *SGType = dyn_cast<StructType>(GType)) {
-    if (!SGType->isLiteral())
-      return SGType->getName();
-  }
-  return "<unknown type>";
-}
-
-bool SpecialCaseList::isIn(const GlobalVariable &G,
-                           const StringRef Category) const {
-  return isIn(*G.getParent(), Category) ||
-         inSectionCategory("global", G.getName(), Category) ||
-         inSectionCategory("type", GetGlobalTypeString(G), Category);
-}
-
-bool SpecialCaseList::isIn(const GlobalAlias &GA,
-                           const StringRef Category) const {
-  if (isIn(*GA.getParent(), Category))
-    return true;
-
-  if (isa<FunctionType>(GA.getType()->getElementType()))
-    return inSectionCategory("fun", GA.getName(), Category);
-
-  return inSectionCategory("global", GA.getName(), Category) ||
-         inSectionCategory("type", GetGlobalTypeString(GA), Category);
-}
-
-bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
-  return inSectionCategory("src", M.getModuleIdentifier(), Category);
-}
-
-bool SpecialCaseList::inSectionCategory(const StringRef Section,
-                                        const StringRef Query,
-                                        const StringRef Category) const {
-  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
-  if (I == Entries.end()) return false;
-  StringMap<Entry>::const_iterator II = I->second.find(Category);
-  if (II == I->second.end()) return false;
-
-  return II->getValue().match(Query);
-}
-
-}  // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 560f581..0c2fc0a 100644
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -59,7 +59,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
 
   // Then unreachable blocks.
   if (UnreachableBlocks.empty()) {
-    UnreachableBlock = 0;
+    UnreachableBlock = nullptr;
   } else if (UnreachableBlocks.size() == 1) {
     UnreachableBlock = UnreachableBlocks.front();
   } else {
@@ -77,7 +77,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
 
   // Now handle return blocks.
   if (ReturningBlocks.empty()) {
-    ReturnBlock = 0;
+    ReturnBlock = nullptr;
     return false;                          // No blocks return
   } else if (ReturningBlocks.size() == 1) {
     ReturnBlock = ReturningBlocks.front(); // Already has a single return block
@@ -91,9 +91,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
                                                "UnifiedReturnBlock", &F);
 
-  PHINode *PN = 0;
+  PHINode *PN = nullptr;
   if (F.getReturnType()->isVoidTy()) {
-    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+    ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
   } else {
     // If the function doesn't return void... add a PHI node to the block...
     PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index c3df215..ed4f45c 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -13,14 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/PassRegistry.h"
 
 using namespace llvm;
 
 /// initializeTransformUtils - Initialize all passes in the TransformUtils
 /// library.
 void llvm::initializeTransformUtils(PassRegistry &Registry) {
+  initializeAddDiscriminatorsPass(Registry);
   initializeBreakCriticalEdgesPass(Registry);
   initializeInstNamerPass(Registry);
   initializeLCSSAPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 457fc80..0f20e6d 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -71,12 +71,12 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
     // Check all operands to see if any need to be remapped.
     for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
       Value *OP = MD->getOperand(i);
-      if (OP == 0) continue;
+      if (!OP) continue;
       Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
       // Use identity map if Mapped_Op is null and we can ignore missing
       // entries.
       if (Mapped_OP == OP ||
-          (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries)))
+          (Mapped_OP == nullptr && (Flags & RF_IgnoreMissingEntries)))
         continue;
 
       // Ok, at least one operand needs remapping.  
@@ -84,13 +84,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
       Elts.reserve(MD->getNumOperands());
       for (i = 0; i != e; ++i) {
         Value *Op = MD->getOperand(i);
-        if (Op == 0)
-          Elts.push_back(0);
+        if (!Op)
+          Elts.push_back(nullptr);
         else {
           Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
           // Use identity map if Mapped_Op is null and we can ignore missing
           // entries.
-          if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
+          if (Mapped_Op == nullptr && (Flags & RF_IgnoreMissingEntries))
             Mapped_Op = Op;
           Elts.push_back(Mapped_Op);
         }
@@ -112,8 +112,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
   // Okay, this either must be a constant (which may or may not be mappable) or
   // is something that is not in the mapping table.
   Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
-  if (C == 0)
-    return 0;
+  if (!C)
+    return nullptr;
   
   if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
     Function *F = 
@@ -126,7 +126,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
   // Otherwise, we have some other constant to remap.  Start by checking to see
   // if all operands have an identity remapping.
   unsigned OpNo = 0, NumOperands = C->getNumOperands();
-  Value *Mapped = 0;
+  Value *Mapped = nullptr;
   for (; OpNo != NumOperands; ++OpNo) {
     Value *Op = C->getOperand(OpNo);
     Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
@@ -187,7 +187,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
     Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
     // If we aren't ignoring missing entries, assert that something happened.
-    if (V != 0)
+    if (V)
       *op = V;
     else
       assert((Flags & RF_IgnoreMissingEntries) &&
@@ -199,7 +199,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
       // If we aren't ignoring missing entries, assert that something happened.
-      if (V != 0)
+      if (V)
         PN->setIncomingBlock(i, cast<BasicBlock>(V));
       else
         assert((Flags & RF_IgnoreMissingEntries) &&