518 files changed, 20295 insertions, 7661 deletions
diff --git a/contrib/llvm/include/llvm/ADT/IntEqClasses.h b/contrib/llvm/include/llvm/ADT/IntEqClasses.h
index 8e75c48..0baee2f 100644
--- a/contrib/llvm/include/llvm/ADT/IntEqClasses.h
+++ b/contrib/llvm/include/llvm/ADT/IntEqClasses.h
@@ -53,10 +53,10 @@ public:
     NumClasses = 0;
   }
 
-  /// join - Join the equivalence classes of a and b. After joining classes,
-  /// findLeader(a) == findLeader(b).
-  /// This requires an uncompressed map.
-  void join(unsigned a, unsigned b);
+  /// Join the equivalence classes of a and b. After joining classes,
+  /// findLeader(a) == findLeader(b). This requires an uncompressed map.
+  /// Returns the new leader.
+  unsigned join(unsigned a, unsigned b);
 
   /// findLeader - Compute the leader of a's equivalence class. This is the
   /// smallest member of the class.
diff --git a/contrib/llvm/include/llvm/ADT/PointerEmbeddedInt.h b/contrib/llvm/include/llvm/ADT/PointerEmbeddedInt.h
new file mode 100644
index 0000000..8781d18
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/PointerEmbeddedInt.h
@@ -0,0 +1,103 @@
+//===- llvm/ADT/PointerEmbeddedInt.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_POINTEREMBEDDEDINT_H
+#define LLVM_ADT_POINTEREMBEDDEDINT_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <climits>
+
+namespace llvm {
+
+/// Utility to embed an integer into a pointer-like type. This is specifically
+/// intended to allow embedding integers where fewer bits are required than
+/// exist in a pointer, and the integer can participate in abstractions along
+/// side other pointer-like types. For example it can be placed into a \c
+/// PointerSumType or \c PointerUnion.
+///
+/// Note that much like pointers, an integer value of zero has special utility
+/// due to boolean conversions. For example, a non-null value can be tested for
+/// in the above abstractions without testing the particular active member.
+/// Also, the default constructed value zero initializes the integer.
+template <typename IntT, int Bits = sizeof(IntT) * CHAR_BIT>
+class PointerEmbeddedInt {
+  uintptr_t Value;
+
+  static_assert(Bits < sizeof(uintptr_t) * CHAR_BIT,
+                "Cannot embed more bits than we have in a pointer!");
+
+  enum : uintptr_t {
+    // We shift as many zeros into the value as we can while preserving the
+    // number of bits desired for the integer.
+    Shift = sizeof(uintptr_t) * CHAR_BIT - Bits,
+
+    // We also want to be able to mask out the preserved bits for asserts.
+    Mask = static_cast<uintptr_t>(-1) << Bits
+  };
+
+  friend class PointerLikeTypeTraits<PointerEmbeddedInt>;
+
+  explicit PointerEmbeddedInt(uintptr_t Value) : Value(Value) {}
+
+public:
+  PointerEmbeddedInt() : Value(0) {}
+
+  PointerEmbeddedInt(IntT I) : Value(static_cast<uintptr_t>(I) << Shift) {
+    assert((I & Mask) == 0 && "Integer has bits outside those preserved!");
+  }
+
+  PointerEmbeddedInt &operator=(IntT I) {
+    assert((I & Mask) == 0 && "Integer has bits outside those preserved!");
+    Value = static_cast<uintptr_t>(I) << Shift;
+  }
+
+  // Note that this imilict conversion additionally allows all of the basic
+  // comparison operators to work transparently, etc.
+  operator IntT() const { return static_cast<IntT>(Value >> Shift); }
+};
+
+// Provide pointer like traits to support use with pointer unions and sum
+// types.
+template <typename IntT, int Bits>
+class PointerLikeTypeTraits<PointerEmbeddedInt<IntT, Bits>> {
+  typedef PointerEmbeddedInt<IntT, Bits> T;
+
+public:
+  static inline void *getAsVoidPointer(const T &P) {
+    return reinterpret_cast<void *>(P.Value);
+  }
+  static inline T getFromVoidPointer(void *P) {
+    return T(reinterpret_cast<uintptr_t>(P));
+  }
+  static inline T getFromVoidPointer(const void *P) {
+    return T(reinterpret_cast<uintptr_t>(P));
+  }
+
+  enum { NumLowBitsAvailable = T::Shift };
+};
+
+// Teach DenseMap how to use PointerEmbeddedInt objects as keys if the Int type
+// itself can be a key.
+template <typename IntT, int Bits>
+struct DenseMapInfo<PointerEmbeddedInt<IntT, Bits>> {
+  typedef PointerEmbeddedInt<IntT, Bits> T;
+
+  typedef DenseMapInfo<IntT> IntInfo;
+
+  static inline T getEmptyKey() { return IntInfo::getEmptyKey(); }
+  static inline T getTombstoneKey() { return IntInfo::getTombstoneKey(); }
+  static unsigned getHashValue(const T &Arg) {
+    return IntInfo::getHashValue(Arg);
+  }
+  static bool isEqual(const T &LHS, const T &RHS) { return LHS == RHS; }
+};
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/ADT/PointerIntPair.h b/contrib/llvm/include/llvm/ADT/PointerIntPair.h
index 0058d85..83fbf12 100644
--- a/contrib/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/contrib/llvm/include/llvm/ADT/PointerIntPair.h
@@ -55,20 +55,25 @@ public:
 
   PointerTy getPointer() const { return Info::getPointer(Value); }
 
-  IntType getInt() const { return (IntType)Info::getInt(Value); }
+  IntType getInt() const {
+    return (IntType)Info::getInt(Value);
+  }
 
   void setPointer(PointerTy PtrVal) {
     Value = Info::updatePointer(Value, PtrVal);
   }
 
-  void setInt(IntType IntVal) { Value = Info::updateInt(Value, IntVal); }
+  void setInt(IntType IntVal) {
+    Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal));
+  }
 
   void initWithPointer(PointerTy PtrVal) {
     Value = Info::updatePointer(0, PtrVal);
   }
 
   void setPointerAndInt(PointerTy PtrVal, IntType IntVal) {
-    Value = Info::updateInt(Info::updatePointer(0, PtrVal), IntVal);
+    Value = Info::updateInt(Info::updatePointer(0, PtrVal),
+                            static_cast<intptr_t>(IntVal));
   }
 
   PointerTy const *getAddrOfPointer() const {
diff --git a/contrib/llvm/include/llvm/ADT/PointerSumType.h b/contrib/llvm/include/llvm/ADT/PointerSumType.h
new file mode 100644
index 0000000..6b8618f
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/PointerSumType.h
@@ -0,0 +1,205 @@
+//===- llvm/ADT/PointerSumType.h --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_POINTERSUMTYPE_H
+#define LLVM_ADT_POINTERSUMTYPE_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+
+namespace llvm {
+
+/// A compile time pair of an integer tag and the pointer-like type which it
+/// indexes within a sum type. Also allows the user to specify a particular
+/// traits class for pointer types with custom behavior such as over-aligned
+/// allocation.
+template <uintptr_t N, typename PointerArgT,
+          typename TraitsArgT = PointerLikeTypeTraits<PointerArgT>>
+struct PointerSumTypeMember {
+  enum { Tag = N };
+  typedef PointerArgT PointerT;
+  typedef TraitsArgT TraitsT;
+};
+
+namespace detail {
+
+template <typename TagT, typename... MemberTs>
+struct PointerSumTypeHelper;
+
+}
+
+/// A sum type over pointer-like types.
+///
+/// This is a normal tagged union across pointer-like types that uses the low
+/// bits of the pointers to store the tag.
+///
+/// Each member of the sum type is specified by passing a \c
+/// PointerSumTypeMember specialization in the variadic member argument list.
+/// This allows the user to control the particular tag value associated with
+/// a particular type, use the same type for multiple different tags, and
+/// customize the pointer-like traits used for a particular member. Note that
+/// these *must* be specializations of \c PointerSumTypeMember, no other type
+/// will suffice, even if it provides a compatible interface.
+///
+/// This type implements all of the comparison operators and even hash table
+/// support by comparing the underlying storage of the pointer values. It
+/// doesn't support delegating to particular members for comparisons.
+///
+/// It also default constructs to a zero tag with a null pointer, whatever that
+/// would be. This means that the zero value for the tag type is significant
+/// and may be desireable to set to a state that is particularly desirable to
+/// default construct.
+///
+/// There is no support for constructing or accessing with a dynamic tag as
+/// that would fundamentally violate the type safety provided by the sum type.
+template <typename TagT, typename... MemberTs> class PointerSumType {
+  uintptr_t Value;
+
+  typedef detail::PointerSumTypeHelper<TagT, MemberTs...> HelperT;
+
+public:
+  PointerSumType() : Value(0) {}
+
+  /// A typed constructor for a specific tagged member of the sum type.
+  template <TagT N>
+  static PointerSumType
+  create(typename HelperT::template Lookup<N>::PointerT Pointer) {
+    PointerSumType Result;
+    void *V = HelperT::template Lookup<N>::TraitsT::getAsVoidPointer(Pointer);
+    assert((reinterpret_cast<uintptr_t>(V) & HelperT::TagMask) == 0 &&
+           "Pointer is insufficiently aligned to store the discriminant!");
+    Result.Value = reinterpret_cast<uintptr_t>(V) | N;
+    return Result;
+  }
+
+  TagT getTag() const { return static_cast<TagT>(Value & HelperT::TagMask); }
+
+  template <TagT N> bool is() const { return N == getTag(); }
+
+  template <TagT N> typename HelperT::template Lookup<N>::PointerT get() const {
+    void *P = is<N>() ? getImpl() : nullptr;
+    return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(P);
+  }
+
+  template <TagT N>
+  typename HelperT::template Lookup<N>::PointerT cast() const {
+    assert(is<N>() && "This instance has a different active member.");
+    return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(getImpl());
+  }
+
+  operator bool() const { return Value & HelperT::PointerMask; }
+  bool operator==(const PointerSumType &R) const { return Value == R.Value; }
+  bool operator!=(const PointerSumType &R) const { return Value != R.Value; }
+  bool operator<(const PointerSumType &R) const { return Value < R.Value; }
+  bool operator>(const PointerSumType &R) const { return Value > R.Value; }
+  bool operator<=(const PointerSumType &R) const { return Value <= R.Value; }
+  bool operator>=(const PointerSumType &R) const { return Value >= R.Value; }
+
+  uintptr_t getOpaqueValue() const { return Value; }
+
+protected:
+  void *getImpl() const {
+    return reinterpret_cast<void *>(Value & HelperT::PointerMask);
+  }
+};
+
+namespace detail {
+
+/// A helper template for implementing \c PointerSumType. It provides fast
+/// compile-time lookup of the member from a particular tag value, along with
+/// useful constants and compile time checking infrastructure..
+template <typename TagT, typename... MemberTs>
+struct PointerSumTypeHelper : MemberTs... {
+  // First we use a trick to allow quickly looking up information about
+  // a particular member of the sum type. This works because we arranged to
+  // have this type derive from all of the member type templates. We can select
+  // the matching member for a tag using type deduction during overload
+  // resolution.
+  template <TagT N, typename PointerT, typename TraitsT>
+  static PointerSumTypeMember<N, PointerT, TraitsT>
+  LookupOverload(PointerSumTypeMember<N, PointerT, TraitsT> *);
+  template <TagT N> static void LookupOverload(...);
+  template <TagT N> struct Lookup {
+    // Compute a particular member type by resolving the lookup helper ovorload.
+    typedef decltype(LookupOverload<N>(
+        static_cast<PointerSumTypeHelper *>(nullptr))) MemberT;
+
+    /// The Nth member's pointer type.
+    typedef typename MemberT::PointerT PointerT;
+
+    /// The Nth member's traits type.
+    typedef typename MemberT::TraitsT TraitsT;
+  };
+
+  // Next we need to compute the number of bits available for the discriminant
+  // by taking the min of the bits available for each member. Much of this
+  // would be amazingly easier with good constexpr support.
+  template <uintptr_t V, uintptr_t... Vs>
+  struct Min : std::integral_constant<
+                   uintptr_t, (V < Min<Vs...>::value ? V : Min<Vs...>::value)> {
+  };
+  template <uintptr_t V>
+  struct Min<V> : std::integral_constant<uintptr_t, V> {};
+  enum { NumTagBits = Min<MemberTs::TraitsT::NumLowBitsAvailable...>::value };
+
+  // Also compute the smallest discriminant and various masks for convenience.
+  enum : uint64_t {
+    MinTag = Min<MemberTs::Tag...>::value,
+    PointerMask = static_cast<uint64_t>(-1) << NumTagBits,
+    TagMask = ~PointerMask
+  };
+
+  // Finally we need a recursive template to do static checks of each
+  // member.
+  template <typename MemberT, typename... InnerMemberTs>
+  struct Checker : Checker<InnerMemberTs...> {
+    static_assert(MemberT::Tag < (1 << NumTagBits),
+                  "This discriminant value requires too many bits!");
+  };
+  template <typename MemberT> struct Checker<MemberT> : std::true_type {
+    static_assert(MemberT::Tag < (1 << NumTagBits),
+                  "This discriminant value requires too many bits!");
+  };
+  static_assert(Checker<MemberTs...>::value,
+                "Each member must pass the checker.");
+};
+
+}
+
+// Teach DenseMap how to use PointerSumTypes as keys.
+template <typename TagT, typename... MemberTs>
+struct DenseMapInfo<PointerSumType<TagT, MemberTs...>> {
+  typedef PointerSumType<TagT, MemberTs...> SumType;
+
+  typedef detail::PointerSumTypeHelper<TagT, MemberTs...> HelperT;
+  enum { SomeTag = HelperT::MinTag };
+  typedef typename HelperT::template Lookup<HelperT::MinTag>::PointerT
+      SomePointerT;
+  typedef DenseMapInfo<SomePointerT> SomePointerInfo;
+
+  static inline SumType getEmptyKey() {
+    return SumType::create<SomeTag>(SomePointerInfo::getEmptyKey());
+  }
+  static inline SumType getTombstoneKey() {
+    return SumType::create<SomeTag>(
+        SomePointerInfo::getTombstoneKey());
+  }
+  static unsigned getHashValue(const SumType &Arg) {
+    uintptr_t OpaqueValue = Arg.getOpaqueValue();
+    return DenseMapInfo<uintptr_t>::getHashValue(OpaqueValue);
+  }
+  static bool isEqual(const SumType &LHS, const SumType &RHS) {
+    return LHS == RHS;
+  }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/ADT/Twine.h b/contrib/llvm/include/llvm/ADT/Twine.h
index db0bf4b..81b1a6d 100644
--- a/contrib/llvm/include/llvm/ADT/Twine.h
+++ b/contrib/llvm/include/llvm/ADT/Twine.h
@@ -101,15 +101,13 @@ namespace llvm {
       /// A pointer to a SmallString instance.
       SmallStringKind,
 
-      /// A char value reinterpreted as a pointer, to render as a character.
+      /// A char value, to render as a character.
       CharKind,
 
-      /// An unsigned int value reinterpreted as a pointer, to render as an
-      /// unsigned decimal integer.
+      /// An unsigned int value, to render as an unsigned decimal integer.
       DecUIKind,
 
-      /// An int value reinterpreted as a pointer, to render as a signed
-      /// decimal integer.
+      /// An int value, to render as a signed decimal integer.
       DecIKind,
 
       /// A pointer to an unsigned long value, to render as an unsigned decimal
diff --git a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
index ef3d5e8..e02f3ab 100644
--- a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -104,54 +104,10 @@ class LazyCallGraph {
 public:
   class Node;
   class SCC;
+  class iterator;
   typedef SmallVector<PointerUnion<Function *, Node *>, 4> NodeVectorT;
   typedef SmallVectorImpl<PointerUnion<Function *, Node *>> NodeVectorImplT;
 
-  /// A lazy iterator used for both the entry nodes and child nodes.
-  ///
-  /// When this iterator is dereferenced, if not yet available, a function will
-  /// be scanned for "calls" or uses of functions and its child information
-  /// will be constructed. All of these results are accumulated and cached in
-  /// the graph.
-  class iterator
-      : public iterator_adaptor_base<iterator, NodeVectorImplT::iterator,
-                                     std::forward_iterator_tag, Node> {
-    friend class LazyCallGraph;
-    friend class LazyCallGraph::Node;
-
-    LazyCallGraph *G;
-    NodeVectorImplT::iterator E;
-
-    // Build the iterator for a specific position in a node list.
-    iterator(LazyCallGraph &G, NodeVectorImplT::iterator NI,
-             NodeVectorImplT::iterator E)
-        : iterator_adaptor_base(NI), G(&G), E(E) {
-      while (I != E && I->isNull())
-        ++I;
-    }
-
-  public:
-    iterator() {}
-
-    using iterator_adaptor_base::operator++;
-    iterator &operator++() {
-      do {
-        ++I;
-      } while (I != E && I->isNull());
-      return *this;
-    }
-
-    reference operator*() const {
-      if (I->is<Node *>())
-        return *I->get<Node *>();
-
-      Function *F = I->get<Function *>();
-      Node &ChildN = G->get(*F);
-      *I = &ChildN;
-      return ChildN;
-    }
-  };
-
   /// A node in the call graph.
   ///
   /// This represents a single node. It's primary roles are to cache the list of
@@ -200,6 +156,51 @@ public:
     bool operator!=(const Node &N) const { return !operator==(N); }
   };
 
+  /// A lazy iterator used for both the entry nodes and child nodes.
+  ///
+  /// When this iterator is dereferenced, if not yet available, a function will
+  /// be scanned for "calls" or uses of functions and its child information
+  /// will be constructed. All of these results are accumulated and cached in
+  /// the graph.
+  class iterator
+      : public iterator_adaptor_base<iterator, NodeVectorImplT::iterator,
+                                     std::forward_iterator_tag, Node> {
+    friend class LazyCallGraph;
+    friend class LazyCallGraph::Node;
+
+    LazyCallGraph *G;
+    NodeVectorImplT::iterator E;
+
+    // Build the iterator for a specific position in a node list.
+    iterator(LazyCallGraph &G, NodeVectorImplT::iterator NI,
+             NodeVectorImplT::iterator E)
+        : iterator_adaptor_base(NI), G(&G), E(E) {
+      while (I != E && I->isNull())
+        ++I;
+    }
+
+  public:
+    iterator() {}
+
+    using iterator_adaptor_base::operator++;
+    iterator &operator++() {
+      do {
+        ++I;
+      } while (I != E && I->isNull());
+      return *this;
+    }
+
+    reference operator*() const {
+      if (I->is<Node *>())
+        return *I->get<Node *>();
+
+      Function *F = I->get<Function *>();
+      Node &ChildN = G->get(*F);
+      *I = &ChildN;
+      return ChildN;
+    }
+  };
+
   /// An SCC of the call graph.
   ///
   /// This represents a Strongly Connected Component of the call graph as
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
index c219bd8..70e636c 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
@@ -59,38 +59,37 @@ template<class N, class M> class LoopInfoBase;
 template<class N, class M> class LoopBase;
 
 //===----------------------------------------------------------------------===//
-/// LoopBase class - Instances of this class are used to represent loops that
-/// are detected in the flow graph
+/// Instances of this class are used to represent loops that are detected in the
+/// flow graph.
 ///
 template<class BlockT, class LoopT>
 class LoopBase {
   LoopT *ParentLoop;
-  // SubLoops - Loops contained entirely within this one.
+  // Loops contained entirely within this one.
   std::vector<LoopT *> SubLoops;
 
-  // Blocks - The list of blocks in this loop.  First entry is the header node.
+  // The list of blocks in this loop. First entry is the header node.
   std::vector<BlockT*> Blocks;
 
   SmallPtrSet<const BlockT*, 8> DenseBlockSet;
 
-  /// Indicator that this loops has been "unlooped", so there's no loop here
-  /// anymore.
-  bool IsUnloop = false;
+  /// Indicator that this loop is no longer a valid loop.
+  bool IsInvalid = false;
 
   LoopBase(const LoopBase<BlockT, LoopT> &) = delete;
   const LoopBase<BlockT, LoopT>&
     operator=(const LoopBase<BlockT, LoopT> &) = delete;
 public:
-  /// Loop ctor - This creates an empty loop.
+  /// This creates an empty loop.
   LoopBase() : ParentLoop(nullptr) {}
   ~LoopBase() {
     for (size_t i = 0, e = SubLoops.size(); i != e; ++i)
       delete SubLoops[i];
   }
 
-  /// getLoopDepth - Return the nesting level of this loop.  An outer-most
-  /// loop has depth 1, for consistency with loop depth values used for basic
-  /// blocks, where depth 0 is used for blocks not inside any loops.
+  /// Return the nesting level of this loop.  An outer-most loop has depth 1,
+  /// for consistency with loop depth values used for basic blocks, where depth
+  /// 0 is used for blocks not inside any loops.
   unsigned getLoopDepth() const {
     unsigned D = 1;
     for (const LoopT *CurLoop = ParentLoop; CurLoop;
@@ -101,33 +100,28 @@ public:
   BlockT *getHeader() const { return Blocks.front(); }
   LoopT *getParentLoop() const { return ParentLoop; }
 
-  /// setParentLoop is a raw interface for bypassing addChildLoop.
+  /// This is a raw interface for bypassing addChildLoop.
   void setParentLoop(LoopT *L) { ParentLoop = L; }
 
-  /// contains - Return true if the specified loop is contained within in
-  /// this loop.
-  ///
+  /// Return true if the specified loop is contained within in this loop.
   bool contains(const LoopT *L) const {
     if (L == this) return true;
     if (!L)        return false;
     return contains(L->getParentLoop());
   }
 
-  /// contains - Return true if the specified basic block is in this loop.
-  ///
+  /// Return true if the specified basic block is in this loop.
   bool contains(const BlockT *BB) const {
     return DenseBlockSet.count(BB);
   }
 
-  /// contains - Return true if the specified instruction is in this loop.
-  ///
+  /// Return true if the specified instruction is in this loop.
   template<class InstT>
   bool contains(const InstT *Inst) const {
     return contains(Inst->getParent());
   }
 
-  /// iterator/begin/end - Return the loops contained entirely within this loop.
-  ///
+  /// Return the loops contained entirely within this loop.
   const std::vector<LoopT *> &getSubLoops() const { return SubLoops; }
   std::vector<LoopT *> &getSubLoopsVector() { return SubLoops; }
   typedef typename std::vector<LoopT *>::const_iterator iterator;
@@ -139,8 +133,7 @@ public:
   reverse_iterator rend() const { return SubLoops.rend(); }
   bool empty() const { return SubLoops.empty(); }
 
-  /// getBlocks - Get a list of the basic blocks which make up this loop.
-  ///
+  /// Get a list of the basic blocks which make up this loop.
   const std::vector<BlockT*> &getBlocks() const { return Blocks; }
   typedef typename std::vector<BlockT*>::const_iterator block_iterator;
   block_iterator block_begin() const { return Blocks.begin(); }
@@ -149,21 +142,19 @@ public:
     return make_range(block_begin(), block_end());
   }
 
-  /// getNumBlocks - Get the number of blocks in this loop in constant time.
+  /// Get the number of blocks in this loop in constant time.
   unsigned getNumBlocks() const {
     return Blocks.size();
   }
 
-  /// Mark this loop as having been unlooped - the last backedge was removed and
-  /// we no longer have a loop.
-  void markUnlooped() { IsUnloop = true; }
+  /// Invalidate the loop, indicating that it is no longer a loop.
+  void invalidate() { IsInvalid = true; }
 
-  /// Return true if this no longer represents a loop.
-  bool isUnloop() const { return IsUnloop; }
+  /// Return true if this loop is no longer valid.
+  bool isInvalid() { return IsInvalid; }
 
-  /// isLoopExiting - True if terminator in the block can branch to another
-  /// block that is outside of the current loop.
-  ///
+  /// True if terminator in the block can branch to another block that is
+  /// outside of the current loop.
   bool isLoopExiting(const BlockT *BB) const {
     typedef GraphTraits<const BlockT*> BlockTraits;
     for (typename BlockTraits::ChildIteratorType SI =
@@ -175,8 +166,7 @@ public:
     return false;
   }
 
-  /// getNumBackEdges - Calculate the number of back edges to the loop header
-  ///
+  /// Calculate the number of back edges to the loop header.
   unsigned getNumBackEdges() const {
     unsigned NumBackEdges = 0;
     BlockT *H = getHeader();
@@ -199,53 +189,49 @@ public:
   // induction variable canonicalization pass should be used to normalize loops
   // for easy analysis.  These methods assume canonical loops.
 
-  /// getExitingBlocks - Return all blocks inside the loop that have successors
-  /// outside of the loop.  These are the blocks _inside of the current loop_
-  /// which branch out.  The returned list is always unique.
-  ///
+  /// Return all blocks inside the loop that have successors outside of the
+  /// loop. These are the blocks _inside of the current loop_ which branch out.
+  /// The returned list is always unique.
   void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const;
 
-  /// getExitingBlock - If getExitingBlocks would return exactly one block,
-  /// return that block. Otherwise return null.
+  /// If getExitingBlocks would return exactly one block, return that block.
+  /// Otherwise return null.
   BlockT *getExitingBlock() const;
 
-  /// getExitBlocks - Return all of the successor blocks of this loop.  These
-  /// are the blocks _outside of the current loop_ which are branched to.
-  ///
+  /// Return all of the successor blocks of this loop. These are the blocks
+  /// _outside of the current loop_ which are branched to.
   void getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const;
 
-  /// getExitBlock - If getExitBlocks would return exactly one block,
-  /// return that block. Otherwise return null.
+  /// If getExitBlocks would return exactly one block, return that block.
+  /// Otherwise return null.
   BlockT *getExitBlock() const;
 
   /// Edge type.
   typedef std::pair<const BlockT*, const BlockT*> Edge;
 
-  /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
+  /// Return all pairs of (_inside_block_,_outside_block_).
   void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;
 
-  /// getLoopPreheader - If there is a preheader for this loop, return it.  A
-  /// loop has a preheader if there is only one edge to the header of the loop
-  /// from outside of the loop.  If this is the case, the block branching to the
-  /// header of the loop is the preheader node.
+  /// If there is a preheader for this loop, return it. A loop has a preheader
+  /// if there is only one edge to the header of the loop from outside of the
+  /// loop. If this is the case, the block branching to the header of the loop
+  /// is the preheader node.
   ///
   /// This method returns null if there is no preheader for the loop.
-  ///
   BlockT *getLoopPreheader() const;
 
-  /// getLoopPredecessor - If the given loop's header has exactly one unique
-  /// predecessor outside the loop, return it. Otherwise return null.
-  /// This is less strict that the loop "preheader" concept, which requires
+  /// If the given loop's header has exactly one unique predecessor outside the
+  /// loop, return it. Otherwise return null.
+  ///  This is less strict that the loop "preheader" concept, which requires
   /// the predecessor to have exactly one successor.
-  ///
   BlockT *getLoopPredecessor() const;
 
-  /// getLoopLatch - If there is a single latch block for this loop, return it.
+  /// If there is a single latch block for this loop, return it.
   /// A latch block is a block that contains a branch back to the header.
   BlockT *getLoopLatch() const;
 
-  /// getLoopLatches - Return all loop latch blocks of this loop. A latch block
-  /// is a block that contains a branch back to the header.
+  /// Return all loop latch blocks of this loop. A latch block is a block that
+  /// contains a branch back to the header.
   void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
     BlockT *H = getHeader();
     typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
@@ -260,32 +246,29 @@ public:
   // APIs for updating loop information after changing the CFG
   //
 
-  /// addBasicBlockToLoop - This method is used by other analyses to update loop
-  /// information.  NewBB is set to be a new member of the current loop.
+  /// This method is used by other analyses to update loop information.
+  /// NewBB is set to be a new member of the current loop.
   /// Because of this, it is added as a member of all parent loops, and is added
   /// to the specified LoopInfo object as being in the current basic block.  It
   /// is not valid to replace the loop header with this method.
-  ///
   void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI);
 
-  /// replaceChildLoopWith - This is used when splitting loops up.  It replaces
-  /// the OldChild entry in our children list with NewChild, and updates the
-  /// parent pointer of OldChild to be null and the NewChild to be this loop.
+  /// This is used when splitting loops up. It replaces the OldChild entry in
+  /// our children list with NewChild, and updates the parent pointer of
+  /// OldChild to be null and the NewChild to be this loop.
   /// This updates the loop depth of the new child.
   void replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild);
 
-  /// addChildLoop - Add the specified loop to be a child of this loop.  This
-  /// updates the loop depth of the new child.
-  ///
+  /// Add the specified loop to be a child of this loop.
+  /// This updates the loop depth of the new child.
   void addChildLoop(LoopT *NewChild) {
     assert(!NewChild->ParentLoop && "NewChild already has a parent!");
     NewChild->ParentLoop = static_cast<LoopT *>(this);
     SubLoops.push_back(NewChild);
   }
 
-  /// removeChildLoop - This removes the specified child from being a subloop of
-  /// this loop.  The loop is not deleted, as it will presumably be inserted
-  /// into another loop.
+  /// This removes the specified child from being a subloop of this loop. The
+  /// loop is not deleted, as it will presumably be inserted into another loop.
   LoopT *removeChildLoop(iterator I) {
     assert(I != SubLoops.end() && "Cannot remove end iterator!");
     LoopT *Child = *I;
@@ -295,7 +278,7 @@ public:
     return Child;
   }
 
-  /// addBlockEntry - This adds a basic block directly to the basic block list.
+  /// This adds a basic block directly to the basic block list.
   /// This should only be used by transformations that create new loops.  Other
   /// transformations should use addBasicBlockToLoop.
   void addBlockEntry(BlockT *BB) {
@@ -303,19 +286,18 @@ public:
     DenseBlockSet.insert(BB);
   }
 
-  /// reverseBlocks - interface to reverse Blocks[from, end of loop] in this loop
+  /// interface to reverse Blocks[from, end of loop] in this loop
   void reverseBlock(unsigned from) {
     std::reverse(Blocks.begin() + from, Blocks.end());
   }
 
-  /// reserveBlocks- interface to do reserve() for Blocks
+  /// interface to do reserve() for Blocks
   void reserveBlocks(unsigned size) {
     Blocks.reserve(size);
   }
 
-  /// moveToHeader - This method is used to move BB (which must be part of this
-  /// loop) to be the loop header of the loop (the block that dominates all
-  /// others).
+  /// This method is used to move BB (which must be part of this loop) to be the
+  /// loop header of the loop (the block that dominates all others).
   void moveToHeader(BlockT *BB) {
     if (Blocks[0] == BB) return;
     for (unsigned i = 0; ; ++i) {
@@ -328,9 +310,9 @@ public:
     }
   }
 
-  /// removeBlockFromLoop - This removes the specified basic block from the
-  /// current loop, updating the Blocks as appropriate.  This does not update
-  /// the mapping in the LoopInfo class.
+  /// This removes the specified basic block from the current loop, updating the
+  /// Blocks as appropriate. This does not update the mapping in the LoopInfo
+  /// class.
   void removeBlockFromLoop(BlockT *BB) {
     auto I = std::find(Blocks.begin(), Blocks.end(), BB);
     assert(I != Blocks.end() && "N is not in this list!");
@@ -339,10 +321,10 @@ public:
     DenseBlockSet.erase(BB);
   }
 
-  /// verifyLoop - Verify loop structure
+  /// Verify loop structure
   void verifyLoop() const;
 
-  /// verifyLoop - Verify loop structure of this loop and all nested loops.
+  /// Verify loop structure of this loop and all nested loops.
   void verifyLoopNest(DenseSet<const LoopT*> *Loops) const;
 
   void print(raw_ostream &OS, unsigned Depth = 0) const;
@@ -368,28 +350,26 @@ class Loop : public LoopBase<BasicBlock, Loop> {
 public:
   Loop() {}
 
-  /// isLoopInvariant - Return true if the specified value is loop invariant
-  ///
+  /// Return true if the specified value is loop invariant.
   bool isLoopInvariant(const Value *V) const;
 
-  /// hasLoopInvariantOperands - Return true if all the operands of the
-  /// specified instruction are loop invariant.
+  /// Return true if all the operands of the specified instruction are loop
+  /// invariant.
   bool hasLoopInvariantOperands(const Instruction *I) const;
 
-  /// makeLoopInvariant - If the given value is an instruction inside of the
-  /// loop and it can be hoisted, do so to make it trivially loop-invariant.
+  /// If the given value is an instruction inside of the loop and it can be
+  /// hoisted, do so to make it trivially loop-invariant.
   /// Return true if the value after any hoisting is loop invariant. This
   /// function can be used as a slightly more aggressive replacement for
   /// isLoopInvariant.
   ///
   /// If InsertPt is specified, it is the point to hoist instructions to.
   /// If null, the terminator of the loop preheader is used.
-  ///
   bool makeLoopInvariant(Value *V, bool &Changed,
                          Instruction *InsertPt = nullptr) const;
 
-  /// makeLoopInvariant - If the given instruction is inside of the
-  /// loop and it can be hoisted, do so to make it trivially loop-invariant.
+  /// If the given instruction is inside of the loop and it can be hoisted, do
+  /// so to make it trivially loop-invariant.
   /// Return true if the instruction after any hoisting is loop invariant. This
   /// function can be used as a slightly more aggressive replacement for
   /// isLoopInvariant.
@@ -400,28 +380,26 @@ public:
   bool makeLoopInvariant(Instruction *I, bool &Changed,
                          Instruction *InsertPt = nullptr) const;
 
-  /// getCanonicalInductionVariable - Check to see if the loop has a canonical
-  /// induction variable: an integer recurrence that starts at 0 and increments
-  /// by one each time through the loop.  If so, return the phi node that
-  /// corresponds to it.
+  /// Check to see if the loop has a canonical induction variable: an integer
+  /// recurrence that starts at 0 and increments by one each time through the
+  /// loop. If so, return the phi node that corresponds to it.
   ///
   /// The IndVarSimplify pass transforms loops to have a canonical induction
   /// variable.
   ///
   PHINode *getCanonicalInductionVariable() const;
 
-  /// isLCSSAForm - Return true if the Loop is in LCSSA form
+  /// Return true if the Loop is in LCSSA form.
   bool isLCSSAForm(DominatorTree &DT) const;
 
-  /// \brief Return true if this Loop and all inner subloops are in LCSSA form.
+  /// Return true if this Loop and all inner subloops are in LCSSA form.
   bool isRecursivelyLCSSAForm(DominatorTree &DT) const;
 
-  /// isLoopSimplifyForm - Return true if the Loop is in the form that
-  /// the LoopSimplify form transforms loops to, which is sometimes called
-  /// normal form.
+  /// Return true if the Loop is in the form that the LoopSimplify form
+  /// transforms loops to, which is sometimes called normal form.
   bool isLoopSimplifyForm() const;
 
-  /// isSafeToClone - Return true if the loop body is safe to clone in practice.
+  /// Return true if the loop body is safe to clone in practice.
   bool isSafeToClone() const;
 
   /// Returns true if the loop is annotated parallel.
@@ -454,23 +432,22 @@ public:
   /// operand should should be the node itself.
   void setLoopID(MDNode *LoopID) const;
 
-  /// hasDedicatedExits - Return true if no exit block for the loop
-  /// has a predecessor that is outside the loop.
+  /// Return true if no exit block for the loop has a predecessor that is
+  /// outside the loop.
   bool hasDedicatedExits() const;
 
-  /// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+  /// Return all unique successor blocks of this loop.
   /// These are the blocks _outside of the current loop_ which are branched to.
   /// This assumes that loop exits are in canonical form.
-  ///
   void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const;
 
-  /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
-  /// block, return that block. Otherwise return null.
+  /// If getUniqueExitBlocks would return exactly one block, return that block.
+  /// Otherwise return null.
   BasicBlock *getUniqueExitBlock() const;
 
   void dump() const;
 
-  /// \brief Return the debug location of the start of this loop.
+  /// Return the debug location of the start of this loop.
   /// This looks for a BB terminating instruction with a known debug
   /// location by looking at the preheader and header blocks. If it
   /// cannot find a terminating instruction with location information,
@@ -498,7 +475,7 @@ private:
 };
 
 //===----------------------------------------------------------------------===//
-/// LoopInfo - This class builds and contains all of the top level loop
+/// This class builds and contains all of the top-level loop
 /// structures in the specified function.
 ///
 
@@ -507,6 +484,8 @@ class LoopInfoBase {
   // BBMap - Mapping of basic blocks to the inner most loop they occur in
   DenseMap<const BlockT *, LoopT *> BBMap;
   std::vector<LoopT *> TopLevelLoops;
+  std::vector<LoopT *> RemovedLoops;
+
   friend class LoopBase<BlockT, LoopT>;
   friend class LoopInfo;
 
@@ -538,6 +517,9 @@ public:
     for (auto *L : TopLevelLoops)
       delete L;
     TopLevelLoops.clear();
+    for (auto *L : RemovedLoops)
+      delete L;
+    RemovedLoops.clear();
   }
 
   /// iterator/begin/end - The interface to the top-level loops in the current
@@ -552,33 +534,30 @@ public:
   reverse_iterator rend() const { return TopLevelLoops.rend(); }
   bool empty() const { return TopLevelLoops.empty(); }
 
-  /// getLoopFor - Return the inner most loop that BB lives in.  If a basic
-  /// block is in no loop (for example the entry node), null is returned.
-  ///
+  /// Return the inner most loop that BB lives in. If a basic block is in no
+  /// loop (for example the entry node), null is returned.
   LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); }
 
-  /// operator[] - same as getLoopFor...
-  ///
+  /// Same as getLoopFor.
   const LoopT *operator[](const BlockT *BB) const {
     return getLoopFor(BB);
   }
 
-  /// getLoopDepth - Return the loop nesting level of the specified block.  A
-  /// depth of 0 means the block is not inside any loop.
-  ///
+  /// Return the loop nesting level of the specified block. A depth of 0 means
+  /// the block is not inside any loop.
   unsigned getLoopDepth(const BlockT *BB) const {
     const LoopT *L = getLoopFor(BB);
     return L ? L->getLoopDepth() : 0;
   }
 
-  // isLoopHeader - True if the block is a loop header node
+  // True if the block is a loop header node
   bool isLoopHeader(const BlockT *BB) const {
     const LoopT *L = getLoopFor(BB);
     return L && L->getHeader() == BB;
   }
 
-  /// removeLoop - This removes the specified top-level loop from this loop info
-  /// object.  The loop is not deleted, as it will presumably be inserted into
+  /// This removes the specified top-level loop from this loop info object.
+  /// The loop is not deleted, as it will presumably be inserted into
   /// another loop.
   LoopT *removeLoop(iterator I) {
     assert(I != end() && "Cannot remove end iterator!");
@@ -588,9 +567,9 @@ public:
     return L;
   }
 
-  /// changeLoopFor - Change the top-level loop that contains BB to the
-  /// specified loop.  This should be used by transformations that restructure
-  /// the loop hierarchy tree.
+  /// Change the top-level loop that contains BB to the specified loop.
+  /// This should be used by transformations that restructure the loop hierarchy
+  /// tree.
   void changeLoopFor(BlockT *BB, LoopT *L) {
     if (!L) {
       BBMap.erase(BB);
@@ -599,8 +578,8 @@ public:
     BBMap[BB] = L;
   }
 
-  /// changeTopLevelLoop - Replace the specified loop in the top-level loops
-  /// list with the indicated loop.
+  /// Replace the specified loop in the top-level loops list with the indicated
+  /// loop.
   void changeTopLevelLoop(LoopT *OldLoop,
                           LoopT *NewLoop) {
     auto I = std::find(TopLevelLoops.begin(), TopLevelLoops.end(), OldLoop);
@@ -610,14 +589,13 @@ public:
            "Loops already embedded into a subloop!");
   }
 
-  /// addTopLevelLoop - This adds the specified loop to the collection of
-  /// top-level loops.
+  /// This adds the specified loop to the collection of top-level loops.
   void addTopLevelLoop(LoopT *New) {
     assert(!New->getParentLoop() && "Loop already in subloop!");
     TopLevelLoops.push_back(New);
   }
 
-  /// removeBlock - This method completely removes BB from all data structures,
+  /// This method completely removes BB from all data structures,
   /// including all of the Loop objects it is nested in and our mapping from
   /// BasicBlocks to loops.
   void removeBlock(BlockT *BB) {
@@ -670,15 +648,14 @@ public:
 
   // Most of the public interface is provided via LoopInfoBase.
 
-  /// updateUnloop - Update LoopInfo after removing the last backedge from a
-  /// loop--now the "unloop". This updates the loop forest and parent loops for
-  /// each block so that Unloop is no longer referenced, but does not actually
-  /// delete the Unloop object. Generally, the loop pass manager should manage
-  /// deleting the Unloop.
-  void updateUnloop(Loop *Unloop);
+  /// Update LoopInfo after removing the last backedge from a loop. This updates
+  /// the loop forest and parent loops for each block so that \c L is no longer
+  /// referenced, but does not actually delete \c L immediately. The pointer
+  /// will remain valid until this LoopInfo's memory is released.
+  void markAsRemoved(Loop *L);
 
-  /// replacementPreservesLCSSAForm - Returns true if replacing From with To
-  /// everywhere is guaranteed to preserve LCSSA form.
+  /// Returns true if replacing From with To everywhere is guaranteed to
+  /// preserve LCSSA form.
   bool replacementPreservesLCSSAForm(Instruction *From, Value *To) {
     // Preserving LCSSA form is only problematic if the replacing value is an
     // instruction.
@@ -698,8 +675,7 @@ public:
     return ToLoop->contains(getLoopFor(From->getParent()));
   }
 
-  /// \brief Checks if moving a specific instruction can break LCSSA in any
-  /// loop.
+  /// Checks if moving a specific instruction can break LCSSA in any loop.
   ///
   /// Return true if moving \p Inst to before \p NewLoc will break LCSSA,
   /// assuming that the function containing \p Inst and \p NewLoc is currently
diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
index f5e778b..cf29fc9 100644
--- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -259,7 +259,7 @@ public:
   void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const;
 
   /// Lower the specified LLVM Constant to an MCExpr.
-  const MCExpr *lowerConstant(const Constant *CV);
+  virtual const MCExpr *lowerConstant(const Constant *CV);
 
   /// \brief Print a general LLVM constant to the .s file.
   void EmitGlobalConstant(const DataLayout &DL, const Constant *CV);
diff --git a/contrib/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm/include/llvm/CodeGen/DIE.h
index fa612d9..72b3adc 100644
--- a/contrib/llvm/include/llvm/CodeGen/DIE.h
+++ b/contrib/llvm/include/llvm/CodeGen/DIE.h
@@ -29,6 +29,48 @@ class MCSymbol;
 class raw_ostream;
 class DwarfTypeUnit;
 
+// AsmStreamerBase - A base abstract interface class defines methods that
+// can be implemented to stream objects or can be implemented to
+// calculate the size of the streamed objects.
+// The derived classes will use an AsmPrinter to implement the methods.
+//
+// TODO: complete this interface and use it to merge EmitValue and SizeOf
+//       methods in the DIE classes below.
+class AsmStreamerBase {
+protected:
+  const AsmPrinter *AP;
+  AsmStreamerBase(const AsmPrinter *AP) : AP(AP) {}
+
+public:
+  virtual ~AsmStreamerBase() {}
+  virtual unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
+                               unsigned PadTo = 0) = 0;
+  virtual unsigned emitInt8(unsigned char Value) = 0;
+  virtual unsigned emitBytes(StringRef Data) = 0;
+};
+
+/// EmittingAsmStreamer - Implements AbstractAsmStreamer to stream objects.
+/// Notice that the return value is not the actual size of the streamed object.
+/// For size calculation use SizeReporterAsmStreamer.
+class EmittingAsmStreamer : public AsmStreamerBase {
+public:
+  EmittingAsmStreamer(const AsmPrinter *AP) : AsmStreamerBase(AP) {}
+  unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
+                       unsigned PadTo = 0) override;
+  unsigned emitInt8(unsigned char Value) override;
+  unsigned emitBytes(StringRef Data) override;
+};
+
+/// SizeReporterAsmStreamer - Only reports the size of the streamed objects.
+class SizeReporterAsmStreamer : public AsmStreamerBase {
+public:
+  SizeReporterAsmStreamer(const AsmPrinter *AP) : AsmStreamerBase(AP) {}
+  unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
+                       unsigned PadTo = 0) override;
+  unsigned emitInt8(unsigned char Value) override;
+  unsigned emitBytes(StringRef Data) override;
+};
+
 //===--------------------------------------------------------------------===//
 /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a
 /// Dwarf abbreviation.
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
index 0157bf9..edade31 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -848,9 +848,9 @@ namespace llvm {
   public:
     explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : LIS(lis) {}
 
-    /// Classify - Classify the values in LI into connected components.
-    /// Return the number of connected components.
-    unsigned Classify(const LiveInterval *LI);
+    /// Classify the values in \p LR into connected components.
+    /// Returns the number of connected components.
+    unsigned Classify(const LiveRange &LR);
 
     /// getEqClass - Classify creates equivalence classes numbered 0..N. Return
     /// the equivalence class assigned the VNI.
diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h b/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
index 987634f..9bbdf3e 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -141,6 +141,28 @@ public:
   LLVM_DUMP_METHOD void dump(const TargetRegisterInfo &TRI) const;
 };
 
+/// List of registers defined and used by a machine instruction.
+class RegisterOperands {
+public:
+  /// List of virtual regiserts and register units read by the instruction.
+  SmallVector<unsigned, 8> Uses;
+  /// \brief List of virtual registers and register units defined by the
+  /// instruction which are not dead.
+  SmallVector<unsigned, 8> Defs;
+  /// \brief List of virtual registers and register units defined by the
+  /// instruction but dead.
+  SmallVector<unsigned, 8> DeadDefs;
+
+  /// Analyze the given instruction \p MI and fill in the Uses, Defs and
+  /// DeadDefs list based on the MachineOperand flags.
+  void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
+               const MachineRegisterInfo &MRI, bool IgnoreDead = false);
+
+  /// Use liveness information to find dead defs not marked with a dead flag
+  /// and move them to the DeadDefs vector.
+  void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
+};
+
 /// Array of PressureDiffs.
 class PressureDiffs {
   PressureDiff *PDiffArray;
@@ -161,6 +183,10 @@ public:
   const PressureDiff &operator[](unsigned Idx) const {
     return const_cast<PressureDiffs*>(this)->operator[](Idx);
   }
+  /// \brief Record pressure difference induced by the given operand list to
+  /// node with index \p Idx.
+  void addInstruction(unsigned Idx, const RegisterOperands &RegOpers,
+                      const MachineRegisterInfo &MRI);
 };
 
 /// Store the effects of a change in pressure on things that MI scheduler cares
@@ -329,8 +355,17 @@ public:
   void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; }
 
   /// Recede across the previous instruction.
-  void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr,
-              PressureDiff *PDiff = nullptr);
+  void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr);
+
+  /// Recede across the previous instruction.
+  /// This "low-level" variant assumes that recedeSkipDebugValues() was
+  /// called previously and takes precomputed RegisterOperands for the
+  /// instruction.
+  void recede(const RegisterOperands &RegOpers,
+              SmallVectorImpl<unsigned> *LiveUses = nullptr);
+
+  /// Recede until we find an instruction which is not a DebugValue.
+  void recedeSkipDebugValues();
 
   /// Advance across the current instruction.
   void advance();
diff --git a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
index f6ad7a8..46c1029 100644
--- a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -93,8 +93,6 @@ struct WinEHFuncInfo {
   DenseMap<const Instruction *, int> EHPadStateMap;
   DenseMap<const FuncletPadInst *, int> FuncletBaseStateMap;
   DenseMap<const InvokeInst *, int> InvokeStateMap;
-  DenseMap<const CatchReturnInst *, const BasicBlock *>
-      CatchRetSuccessorColorMap;
   DenseMap<MCSymbol *, std::pair<int, MCSymbol *>> LabelToStateMap;
   SmallVector<CxxUnwindMapEntry, 4> CxxUnwindMap;
   SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap;
@@ -125,8 +123,5 @@ void calculateSEHStateNumbers(const Function *ParentFn,
                               WinEHFuncInfo &FuncInfo);
 
 void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo);
-
-void calculateCatchReturnSuccessorColors(const Function *Fn,
-                                         WinEHFuncInfo &FuncInfo);
 }
 #endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index 0703fb1..3098199 100644
--- a/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -28,13 +28,16 @@ class DIPrinter {
   raw_ostream &OS;
   bool PrintFunctionNames;
   bool PrintPretty;
-  void printName(const DILineInfo &Info, bool Inlined);
+  int PrintSourceContext;
+
+  void print(const DILineInfo &Info, bool Inlined);
+  void printContext(std::string FileName, int64_t Line);
 
 public:
   DIPrinter(raw_ostream &OS, bool PrintFunctionNames = true,
-            bool PrintPretty = false)
+            bool PrintPretty = false, int PrintSourceContext = 0)
       : OS(OS), PrintFunctionNames(PrintFunctionNames),
-        PrintPretty(PrintPretty) {}
+        PrintPretty(PrintPretty), PrintSourceContext(PrintSourceContext) {}
 
   DIPrinter &operator<<(const DILineInfo &Info);
   DIPrinter &operator<<(const DIInliningInfo &Info);
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 7dab5d1..84af472 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -19,7 +19,6 @@
 #include "LambdaResolver.h"
 #include "LogicalDylib.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <list>
 #include <memory>
@@ -61,31 +60,36 @@ private:
 
   typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT;
 
-  class ModuleOwner {
+  // Provide type-erasure for the Modules and MemoryManagers.
+  template <typename ResourceT>
+  class ResourceOwner {
   public:
-    ModuleOwner() = default;
-    ModuleOwner(const ModuleOwner&) = delete;
-    ModuleOwner& operator=(const ModuleOwner&) = delete;
-    virtual ~ModuleOwner() { }
-    virtual Module& getModule() const = 0;
+    ResourceOwner() = default;
+    ResourceOwner(const ResourceOwner&) = delete;
+    ResourceOwner& operator=(const ResourceOwner&) = delete;
+    virtual ~ResourceOwner() { }
+    virtual ResourceT& getResource() const = 0;
   };
 
-  template <typename ModulePtrT>
-  class ModuleOwnerImpl : public ModuleOwner {
+  template <typename ResourceT, typename ResourcePtrT>
+  class ResourceOwnerImpl : public ResourceOwner<ResourceT> {
   public:
-    ModuleOwnerImpl(ModulePtrT ModulePtr) : ModulePtr(std::move(ModulePtr)) {}
-    Module& getModule() const override { return *ModulePtr; }
+    ResourceOwnerImpl(ResourcePtrT ResourcePtr)
+      : ResourcePtr(std::move(ResourcePtr)) {}
+    ResourceT& getResource() const override { return *ResourcePtr; }
   private:
-    ModulePtrT ModulePtr;
+    ResourcePtrT ResourcePtr;
   };
 
-  template <typename ModulePtrT>
-  std::unique_ptr<ModuleOwner> wrapOwnership(ModulePtrT ModulePtr) {
-    return llvm::make_unique<ModuleOwnerImpl<ModulePtrT>>(std::move(ModulePtr));
+  template <typename ResourceT, typename ResourcePtrT>
+  std::unique_ptr<ResourceOwner<ResourceT>>
+  wrapOwnership(ResourcePtrT ResourcePtr) {
+    typedef ResourceOwnerImpl<ResourceT, ResourcePtrT> RO;
+    return llvm::make_unique<RO>(std::move(ResourcePtr));
   }
 
   struct LogicalModuleResources {
-    std::unique_ptr<ModuleOwner> SourceModuleOwner;
+    std::unique_ptr<ResourceOwner<Module>> SourceModule;
     std::set<const Function*> StubsToClone;
     std::unique_ptr<IndirectStubsMgrT> StubsMgr;
 
@@ -93,15 +97,16 @@ private:
 
     // Explicit move constructor to make MSVC happy.
     LogicalModuleResources(LogicalModuleResources &&Other)
-        : SourceModuleOwner(std::move(Other.SourceModuleOwner)),
+        : SourceModule(std::move(Other.SourceModule)),
           StubsToClone(std::move(Other.StubsToClone)),
           StubsMgr(std::move(Other.StubsMgr)) {}
 
     // Explicit move assignment to make MSVC happy.
     LogicalModuleResources& operator=(LogicalModuleResources &&Other) {
-      SourceModuleOwner = std::move(Other.SourceModuleOwner);
+      SourceModule = std::move(Other.SourceModule);
       StubsToClone = std::move(Other.StubsToClone);
       StubsMgr = std::move(Other.StubsMgr);
+      return *this;
     }
 
     JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
@@ -114,12 +119,35 @@ private:
 
   };
 
-
-
   struct LogicalDylibResources {
     typedef std::function<RuntimeDyld::SymbolInfo(const std::string&)>
       SymbolResolverFtor;
+
+    typedef std::function<typename BaseLayerT::ModuleSetHandleT(
+                            BaseLayerT&,
+                            std::unique_ptr<Module>,
+                            std::unique_ptr<RuntimeDyld::SymbolResolver>)>
+      ModuleAdderFtor;
+
+    LogicalDylibResources() = default;
+
+    // Explicit move constructor to make MSVC happy.
+    LogicalDylibResources(LogicalDylibResources &&Other)
+      : ExternalSymbolResolver(std::move(Other.ExternalSymbolResolver)),
+        MemMgr(std::move(Other.MemMgr)),
+        ModuleAdder(std::move(Other.ModuleAdder)) {}
+
+    // Explicit move assignment operator to make MSVC happy.
+    LogicalDylibResources& operator=(LogicalDylibResources &&Other) {
+      ExternalSymbolResolver = std::move(Other.ExternalSymbolResolver);
+      MemMgr = std::move(Other.MemMgr);
+      ModuleAdder = std::move(Other.ModuleAdder);
+      return *this;
+    }
+
     SymbolResolverFtor ExternalSymbolResolver;
+    std::unique_ptr<ResourceOwner<RuntimeDyld::MemoryManager>> MemMgr;
+    ModuleAdderFtor ModuleAdder;
   };
 
   typedef LogicalDylib<BaseLayerT, LogicalModuleResources,
@@ -157,9 +185,6 @@ public:
                                 MemoryManagerPtrT MemMgr,
                                 SymbolResolverPtrT Resolver) {
 
-    assert(MemMgr == nullptr &&
-           "User supplied memory managers not supported with COD yet.");
-
     LogicalDylibs.push_back(CODLogicalDylib(BaseLayer));
     auto &LDResources = LogicalDylibs.back().getDylibResources();
 
@@ -168,6 +193,18 @@ public:
         return Resolver->findSymbol(Name);
       };
 
+    auto &MemMgrRef = *MemMgr;
+    LDResources.MemMgr =
+      wrapOwnership<RuntimeDyld::MemoryManager>(std::move(MemMgr));
+
+    LDResources.ModuleAdder =
+      [&MemMgrRef](BaseLayerT &B, std::unique_ptr<Module> M,
+                   std::unique_ptr<RuntimeDyld::SymbolResolver> R) {
+        std::vector<std::unique_ptr<Module>> Ms;
+        Ms.push_back(std::move(M));
+        return B.addModuleSet(std::move(Ms), &MemMgrRef, std::move(R));
+      };
+
     // Process each of the modules in this module set.
     for (auto &M : Ms)
       addLogicalModule(LogicalDylibs.back(), std::move(M));
@@ -215,9 +252,9 @@ private:
     auto LMH = LD.createLogicalModule();
     auto &LMResources =  LD.getLogicalModuleResources(LMH);
 
-    LMResources.SourceModuleOwner = wrapOwnership(std::move(SrcMPtr));
+    LMResources.SourceModule = wrapOwnership<Module>(std::move(SrcMPtr));
 
-    Module &SrcM = LMResources.SourceModuleOwner->getModule();
+    Module &SrcM = LMResources.SourceModule->getResource();
 
     // Create the GlobalValues module.
     const DataLayout &DL = SrcM.getDataLayout();
@@ -326,12 +363,9 @@ private:
           return RuntimeDyld::SymbolInfo(nullptr);
         });
 
-    std::vector<std::unique_ptr<Module>> GVsMSet;
-    GVsMSet.push_back(std::move(GVsM));
     auto GVsH =
-      BaseLayer.addModuleSet(std::move(GVsMSet),
-                             llvm::make_unique<SectionMemoryManager>(),
-                             std::move(GVsResolver));
+      LD.getDylibResources().ModuleAdder(BaseLayer, std::move(GVsM),
+				         std::move(GVsResolver));
     LD.addToLogicalModule(LMH, GVsH);
   }
 
@@ -348,7 +382,7 @@ private:
                                   LogicalModuleHandle LMH,
                                   Function &F) {
     auto &LMResources = LD.getLogicalModuleResources(LMH);
-    Module &SrcM = LMResources.SourceModuleOwner->getModule();
+    Module &SrcM = LMResources.SourceModule->getResource();
 
     // If F is a declaration we must already have compiled it.
     if (F.isDeclaration())
@@ -386,7 +420,7 @@ private:
                                           LogicalModuleHandle LMH,
                                           const PartitionT &Part) {
     auto &LMResources = LD.getLogicalModuleResources(LMH);
-    Module &SrcM = LMResources.SourceModuleOwner->getModule();
+    Module &SrcM = LMResources.SourceModule->getResource();
 
     // Create the module.
     std::string NewName = SrcM.getName();
@@ -445,7 +479,6 @@ private:
       moveFunctionBody(*F, VMap, &Materializer);
 
     // Create memory manager and symbol resolver.
-    auto MemMgr = llvm::make_unique<SectionMemoryManager>();
     auto Resolver = createLambdaResolver(
         [this, &LD, LMH](const std::string &Name) {
           if (auto Symbol = LD.findSymbolInternally(LMH, Name))
@@ -459,10 +492,9 @@ private:
                                            Symbol.getFlags());
           return RuntimeDyld::SymbolInfo(nullptr);
         });
-    std::vector<std::unique_ptr<Module>> PartMSet;
-    PartMSet.push_back(std::move(M));
-    return BaseLayer.addModuleSet(std::move(PartMSet), std::move(MemMgr),
-                                  std::move(Resolver));
+
+    return LD.getDylibResources().ModuleAdder(BaseLayer, std::move(M),
+					      std::move(Resolver));
   }
 
   BaseLayerT &BaseLayer;
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index d6ee3a8..e17630f 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -22,6 +22,7 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/Process.h"
 #include <sstream>
 
 namespace llvm {
@@ -179,14 +180,15 @@ private:
     std::error_code EC;
     auto TrampolineBlock =
       sys::OwningMemoryBlock(
-        sys::Memory::allocateMappedMemory(TargetT::PageSize, nullptr,
+        sys::Memory::allocateMappedMemory(sys::Process::getPageSize(), nullptr,
                                           sys::Memory::MF_READ |
                                           sys::Memory::MF_WRITE, EC));
     assert(!EC && "Failed to allocate trampoline block");
 
 
     unsigned NumTrampolines =
-      (TargetT::PageSize - TargetT::PointerSize) / TargetT::TrampolineSize;
+      (sys::Process::getPageSize() - TargetT::PointerSize) /
+        TargetT::TrampolineSize;
 
     uint8_t *TrampolineMem = static_cast<uint8_t*>(TrampolineBlock.base());
     TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
@@ -240,8 +242,8 @@ private:
   virtual void anchor();
 };
 
-/// @brief IndirectStubsManager implementation for a concrete target, e.g.
-///        OrcX86_64. (See OrcTargetSupport.h).
+/// @brief IndirectStubsManager implementation for the host architecture, e.g.
+///        OrcX86_64. (See OrcArchitectureSupport.h).
 template <typename TargetT>
 class LocalIndirectStubsManager : public IndirectStubsManager {
 public:
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index 2acfecf..4dc48f1 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -108,9 +108,7 @@ private:
 
     void Finalize() override {
       State = Finalizing;
-      RTDyld->resolveRelocations();
-      RTDyld->registerEHFrames();
-      MemMgr->finalizeMemory();
+      RTDyld->finalizeWithMemoryManagerLocking();
       State = Finalized;
     }
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
index 246d3e0..1b0488b 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
@@ -1,4 +1,4 @@
-//===-- OrcTargetSupport.h - Code to support specific targets  --*- C++ -*-===//
+//===-- OrcArchitectureSupport.h - Architecture support code  ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,32 +7,76 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Target specific code for Orc, e.g. callback assembly.
+// Architecture specific code for Orc, e.g. callback assembly.
 //
-// Target classes should be part of the JIT *target* process, not the host
+// Architecture classes should be part of the JIT *target* process, not the host
 // process (except where you're doing hosted JITing and the two are one and the
 // same).
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
 
 #include "IndirectionUtils.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/Process.h"
 
 namespace llvm {
 namespace orc {
 
+/// Generic ORC Architecture support.
+///
+/// This class can be substituted as the target architecure support class for
+/// ORC templates that require one (e.g. IndirectStubsManagers). It does not
+/// support lazy JITing however, and any attempt to use that functionality
+/// will result in execution of an llvm_unreachable.
+class OrcGenericArchitecture {
+public:
+  static const unsigned PointerSize = sizeof(uintptr_t);
+  static const unsigned TrampolineSize = 1;
+  static const unsigned ResolverCodeSize = 1;
+
+  typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+
+  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+                                void *CallbackMgr) {
+    llvm_unreachable("writeResolverCode is not supported by the generic host "
+                     "support class");
+  }
+
+  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+                               unsigned NumTrampolines) {
+    llvm_unreachable("writeTrampolines is not supported by the generic host "
+                     "support class");
+  }
+
+  class IndirectStubsInfo {
+  public:
+    const static unsigned StubSize = 1;
+    unsigned getNumStubs() const { llvm_unreachable("Not supported"); }
+    void *getStub(unsigned Idx) const { llvm_unreachable("Not supported"); }
+    void **getPtr(unsigned Idx) const { llvm_unreachable("Not supported"); }
+  };
+
+  static std::error_code emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+                                                unsigned MinStubs,
+                                                void *InitialPtrVal) {
+    llvm_unreachable("emitIndirectStubsBlock is not supported by the generic "
+                     "host support class");
+  }
+};
+
+/// @brief X86_64 support.
+///
+/// X86_64 supports lazy JITing.
 class OrcX86_64 {
 public:
-  static const unsigned PageSize = 4096;
   static const unsigned PointerSize = 8;
   static const unsigned TrampolineSize = 8;
   static const unsigned ResolverCodeSize = 0x78;
 
-  typedef TargetAddress (*JITReentryFn)(void *CallbackMgr,
-                                        void *TrampolineId);
+  typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
 
   /// @brief Write the resolver code into the given memory. The user is be
   ///        responsible for allocating the memory and setting permissions.
@@ -49,16 +93,16 @@ public:
   ///        makeIndirectStubsBlock function.
   class IndirectStubsInfo {
     friend class OrcX86_64;
+
   public:
     const static unsigned StubSize = 8;
-    const static unsigned PtrSize = 8;
 
     IndirectStubsInfo() : NumStubs(0) {}
     IndirectStubsInfo(IndirectStubsInfo &&Other)
         : NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) {
       Other.NumStubs = 0;
     }
-    IndirectStubsInfo& operator=(IndirectStubsInfo &&Other) {
+    IndirectStubsInfo &operator=(IndirectStubsInfo &&Other) {
       NumStubs = Other.NumStubs;
       Other.NumStubs = 0;
       StubsMem = std::move(Other.StubsMem);
@@ -70,17 +114,18 @@ public:
 
     /// @brief Get a pointer to the stub at the given index, which must be in
     ///        the range 0 .. getNumStubs() - 1.
-    void* getStub(unsigned Idx) const {
-      return static_cast<uint64_t*>(StubsMem.base()) + Idx;
+    void *getStub(unsigned Idx) const {
+      return static_cast<uint64_t *>(StubsMem.base()) + Idx;
     }
 
     /// @brief Get a pointer to the implementation-pointer at the given index,
     ///        which must be in the range 0 .. getNumStubs() - 1.
-    void** getPtr(unsigned Idx) const {
+    void **getPtr(unsigned Idx) const {
       char *PtrsBase =
-        static_cast<char*>(StubsMem.base()) + NumStubs * StubSize;
-      return reinterpret_cast<void**>(PtrsBase) + Idx;
+          static_cast<char *>(StubsMem.base()) + NumStubs * StubSize;
+      return reinterpret_cast<void **>(PtrsBase) + Idx;
     }
+
   private:
     unsigned NumStubs;
     sys::OwningMemoryBlock StubsMem;
@@ -100,4 +145,4 @@ public:
 } // End namespace orc.
 } // End namespace llvm.
 
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
+#endif // LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
new file mode 100644
index 0000000..48f35d6
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
@@ -0,0 +1,37 @@
+//===------ OrcError.h - Reject symbol lookup requests ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//   Define an error category, error codes, and helper utilities for Orc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCERROR_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCERROR_H
+
+#include <system_error>
+
+namespace llvm {
+namespace orc {
+
+enum class OrcErrorCode : int {
+  // RPC Errors
+  RemoteAllocatorDoesNotExist = 1,
+  RemoteAllocatorIdAlreadyInUse,
+  RemoteMProtectAddrUnrecognized,
+  RemoteIndirectStubsOwnerDoesNotExist,
+  RemoteIndirectStubsOwnerIdAlreadyInUse,
+  UnexpectedRPCCall
+};
+
+std::error_code orcError(OrcErrorCode ErrCode);
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_ORCERROR_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
new file mode 100644
index 0000000..d7640b8
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -0,0 +1,784 @@
+//===---- OrcRemoteTargetClient.h - Orc Remote-target Client ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the OrcRemoteTargetClient class and helpers. This class
+// can be used to communicate over an RPCChannel with an OrcRemoteTargetServer
+// instance to support remote-JITing.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
+
+#include "IndirectionUtils.h"
+#include "OrcRemoteTargetRPCAPI.h"
+#include <system_error>
+
+#define DEBUG_TYPE "orc-remote"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+/// This class provides utilities (including memory manager, indirect stubs
+/// manager, and compile callback manager types) that support remote JITing
+/// in ORC.
+///
+/// Each of the utility classes talks to a JIT server (an instance of the
+/// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out
+/// its actions.
+template <typename ChannelT>
+class OrcRemoteTargetClient : public OrcRemoteTargetRPCAPI {
+public:
+  /// Remote memory manager.
+  class RCMemoryManager : public RuntimeDyld::MemoryManager {
+  public:
+    RCMemoryManager(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id)
+        : Client(Client), Id(Id) {
+      DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
+    }
+
+    RCMemoryManager(RCMemoryManager &&Other)
+        : Client(std::move(Other.Client)), Id(std::move(Other.Id)),
+          Unmapped(std::move(Other.Unmapped)),
+          Unfinalized(std::move(Other.Unfinalized)) {}
+
+    RCMemoryManager operator=(RCMemoryManager &&Other) {
+      Client = std::move(Other.Client);
+      Id = std::move(Other.Id);
+      Unmapped = std::move(Other.Unmapped);
+      Unfinalized = std::move(Other.Unfinalized);
+      return *this;
+    }
+
+    ~RCMemoryManager() {
+      Client.destroyRemoteAllocator(Id);
+      DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
+    }
+
+    uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                 unsigned SectionID,
+                                 StringRef SectionName) override {
+      Unmapped.back().CodeAllocs.emplace_back(Size, Alignment);
+      uint8_t *Alloc = reinterpret_cast<uint8_t *>(
+          Unmapped.back().CodeAllocs.back().getLocalAddress());
+      DEBUG(dbgs() << "Allocator " << Id << " allocated code for "
+                   << SectionName << ": " << Alloc << " (" << Size
+                   << " bytes, alignment " << Alignment << ")\n");
+      return Alloc;
+    }
+
+    uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                 unsigned SectionID, StringRef SectionName,
+                                 bool IsReadOnly) override {
+      if (IsReadOnly) {
+        Unmapped.back().RODataAllocs.emplace_back(Size, Alignment);
+        uint8_t *Alloc = reinterpret_cast<uint8_t *>(
+            Unmapped.back().RODataAllocs.back().getLocalAddress());
+        DEBUG(dbgs() << "Allocator " << Id << " allocated ro-data for "
+                     << SectionName << ": " << Alloc << " (" << Size
+                     << " bytes, alignment " << Alignment << ")\n");
+        return Alloc;
+      } // else...
+
+      Unmapped.back().RWDataAllocs.emplace_back(Size, Alignment);
+      uint8_t *Alloc = reinterpret_cast<uint8_t *>(
+          Unmapped.back().RWDataAllocs.back().getLocalAddress());
+      DEBUG(dbgs() << "Allocator " << Id << " allocated rw-data for "
+                   << SectionName << ": " << Alloc << " (" << Size
+                   << " bytes, alignment " << Alignment << ")\n");
+      return Alloc;
+    }
+
+    void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+                                uintptr_t RODataSize, uint32_t RODataAlign,
+                                uintptr_t RWDataSize,
+                                uint32_t RWDataAlign) override {
+      Unmapped.push_back(ObjectAllocs());
+
+      DEBUG(dbgs() << "Allocator " << Id << " reserved:\n");
+
+      if (CodeSize != 0) {
+        std::error_code EC = Client.reserveMem(Unmapped.back().RemoteCodeAddr,
+                                               Id, CodeSize, CodeAlign);
+        // FIXME; Add error to poll.
+        assert(!EC && "Failed reserving remote memory.");
+        (void)EC;
+        DEBUG(dbgs() << "  code: "
+                     << format("0x%016x", Unmapped.back().RemoteCodeAddr)
+                     << " (" << CodeSize << " bytes, alignment " << CodeAlign
+                     << ")\n");
+      }
+
+      if (RODataSize != 0) {
+        std::error_code EC = Client.reserveMem(Unmapped.back().RemoteRODataAddr,
+                                               Id, RODataSize, RODataAlign);
+        // FIXME; Add error to poll.
+        assert(!EC && "Failed reserving remote memory.");
+        (void)EC;
+        DEBUG(dbgs() << "  ro-data: "
+                     << format("0x%016x", Unmapped.back().RemoteRODataAddr)
+                     << " (" << RODataSize << " bytes, alignment "
+                     << RODataAlign << ")\n");
+      }
+
+      if (RWDataSize != 0) {
+        std::error_code EC = Client.reserveMem(Unmapped.back().RemoteRWDataAddr,
+                                               Id, RWDataSize, RWDataAlign);
+        // FIXME; Add error to poll.
+        assert(!EC && "Failed reserving remote memory.");
+        (void)EC;
+        DEBUG(dbgs() << "  rw-data: "
+                     << format("0x%016x", Unmapped.back().RemoteRWDataAddr)
+                     << " (" << RWDataSize << " bytes, alignment "
+                     << RWDataAlign << ")\n");
+      }
+    }
+
+    bool needsToReserveAllocationSpace() override { return true; }
+
+    void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+                          size_t Size) override {}
+
+    void deregisterEHFrames(uint8_t *addr, uint64_t LoadAddr,
+                            size_t Size) override {}
+
+    void notifyObjectLoaded(RuntimeDyld &Dyld,
+                            const object::ObjectFile &Obj) override {
+      DEBUG(dbgs() << "Allocator " << Id << " applied mappings:\n");
+      for (auto &ObjAllocs : Unmapped) {
+        {
+          TargetAddress NextCodeAddr = ObjAllocs.RemoteCodeAddr;
+          for (auto &Alloc : ObjAllocs.CodeAllocs) {
+            NextCodeAddr = RoundUpToAlignment(NextCodeAddr, Alloc.getAlign());
+            Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextCodeAddr);
+            DEBUG(dbgs() << "     code: "
+                         << static_cast<void *>(Alloc.getLocalAddress())
+                         << " -> " << format("0x%016x", NextCodeAddr) << "\n");
+            Alloc.setRemoteAddress(NextCodeAddr);
+            NextCodeAddr += Alloc.getSize();
+          }
+        }
+        {
+          TargetAddress NextRODataAddr = ObjAllocs.RemoteRODataAddr;
+          for (auto &Alloc : ObjAllocs.RODataAllocs) {
+            NextRODataAddr =
+                RoundUpToAlignment(NextRODataAddr, Alloc.getAlign());
+            Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextRODataAddr);
+            DEBUG(dbgs() << "  ro-data: "
+                         << static_cast<void *>(Alloc.getLocalAddress())
+                         << " -> " << format("0x%016x", NextRODataAddr)
+                         << "\n");
+            Alloc.setRemoteAddress(NextRODataAddr);
+            NextRODataAddr += Alloc.getSize();
+          }
+        }
+        {
+          TargetAddress NextRWDataAddr = ObjAllocs.RemoteRWDataAddr;
+          for (auto &Alloc : ObjAllocs.RWDataAllocs) {
+            NextRWDataAddr =
+                RoundUpToAlignment(NextRWDataAddr, Alloc.getAlign());
+            Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextRWDataAddr);
+            DEBUG(dbgs() << "  rw-data: "
+                         << static_cast<void *>(Alloc.getLocalAddress())
+                         << " -> " << format("0x%016x", NextRWDataAddr)
+                         << "\n");
+            Alloc.setRemoteAddress(NextRWDataAddr);
+            NextRWDataAddr += Alloc.getSize();
+          }
+        }
+        Unfinalized.push_back(std::move(ObjAllocs));
+      }
+      Unmapped.clear();
+    }
+
+    bool finalizeMemory(std::string *ErrMsg = nullptr) override {
+      DEBUG(dbgs() << "Allocator " << Id << " finalizing:\n");
+
+      for (auto &ObjAllocs : Unfinalized) {
+
+        for (auto &Alloc : ObjAllocs.CodeAllocs) {
+          DEBUG(dbgs() << "  copying code: "
+                       << static_cast<void *>(Alloc.getLocalAddress()) << " -> "
+                       << format("0x%016x", Alloc.getRemoteAddress()) << " ("
+                       << Alloc.getSize() << " bytes)\n");
+          Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
+                          Alloc.getSize());
+        }
+
+        if (ObjAllocs.RemoteCodeAddr) {
+          DEBUG(dbgs() << "  setting R-X permissions on code block: "
+                       << format("0x%016x", ObjAllocs.RemoteCodeAddr) << "\n");
+          Client.setProtections(Id, ObjAllocs.RemoteCodeAddr,
+                                sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+        }
+
+        for (auto &Alloc : ObjAllocs.RODataAllocs) {
+          DEBUG(dbgs() << "  copying ro-data: "
+                       << static_cast<void *>(Alloc.getLocalAddress()) << " -> "
+                       << format("0x%016x", Alloc.getRemoteAddress()) << " ("
+                       << Alloc.getSize() << " bytes)\n");
+          Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
+                          Alloc.getSize());
+        }
+
+        if (ObjAllocs.RemoteRODataAddr) {
+          DEBUG(dbgs() << "  setting R-- permissions on ro-data block: "
+                       << format("0x%016x", ObjAllocs.RemoteRODataAddr)
+                       << "\n");
+          Client.setProtections(Id, ObjAllocs.RemoteRODataAddr,
+                                sys::Memory::MF_READ);
+        }
+
+        for (auto &Alloc : ObjAllocs.RWDataAllocs) {
+          DEBUG(dbgs() << "  copying rw-data: "
+                       << static_cast<void *>(Alloc.getLocalAddress()) << " -> "
+                       << format("0x%016x", Alloc.getRemoteAddress()) << " ("
+                       << Alloc.getSize() << " bytes)\n");
+          Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
+                          Alloc.getSize());
+        }
+
+        if (ObjAllocs.RemoteRWDataAddr) {
+          DEBUG(dbgs() << "  setting RW- permissions on rw-data block: "
+                       << format("0x%016x", ObjAllocs.RemoteRWDataAddr)
+                       << "\n");
+          Client.setProtections(Id, ObjAllocs.RemoteRWDataAddr,
+                                sys::Memory::MF_READ | sys::Memory::MF_WRITE);
+        }
+      }
+      Unfinalized.clear();
+
+      return false;
+    }
+
+  private:
+    class Alloc {
+    public:
+      Alloc(uint64_t Size, unsigned Align)
+          : Size(Size), Align(Align), Contents(new char[Size + Align - 1]),
+            RemoteAddr(0) {}
+
+      Alloc(Alloc &&Other)
+          : Size(std::move(Other.Size)), Align(std::move(Other.Align)),
+            Contents(std::move(Other.Contents)),
+            RemoteAddr(std::move(Other.RemoteAddr)) {}
+
+      Alloc &operator=(Alloc &&Other) {
+        Size = std::move(Other.Size);
+        Align = std::move(Other.Align);
+        Contents = std::move(Other.Contents);
+        RemoteAddr = std::move(Other.RemoteAddr);
+        return *this;
+      }
+
+      uint64_t getSize() const { return Size; }
+
+      unsigned getAlign() const { return Align; }
+
+      char *getLocalAddress() const {
+        uintptr_t LocalAddr = reinterpret_cast<uintptr_t>(Contents.get());
+        LocalAddr = RoundUpToAlignment(LocalAddr, Align);
+        return reinterpret_cast<char *>(LocalAddr);
+      }
+
+      void setRemoteAddress(TargetAddress RemoteAddr) {
+        this->RemoteAddr = RemoteAddr;
+      }
+
+      TargetAddress getRemoteAddress() const { return RemoteAddr; }
+
+    private:
+      uint64_t Size;
+      unsigned Align;
+      std::unique_ptr<char[]> Contents;
+      TargetAddress RemoteAddr;
+    };
+
+    struct ObjectAllocs {
+      ObjectAllocs()
+          : RemoteCodeAddr(0), RemoteRODataAddr(0), RemoteRWDataAddr(0) {}
+
+      ObjectAllocs(ObjectAllocs &&Other)
+          : RemoteCodeAddr(std::move(Other.RemoteCodeAddr)),
+            RemoteRODataAddr(std::move(Other.RemoteRODataAddr)),
+            RemoteRWDataAddr(std::move(Other.RemoteRWDataAddr)),
+            CodeAllocs(std::move(Other.CodeAllocs)),
+            RODataAllocs(std::move(Other.RODataAllocs)),
+            RWDataAllocs(std::move(Other.RWDataAllocs)) {}
+
+      ObjectAllocs &operator=(ObjectAllocs &&Other) {
+        RemoteCodeAddr = std::move(Other.RemoteCodeAddr);
+        RemoteRODataAddr = std::move(Other.RemoteRODataAddr);
+        RemoteRWDataAddr = std::move(Other.RemoteRWDataAddr);
+        CodeAllocs = std::move(Other.CodeAllocs);
+        RODataAllocs = std::move(Other.RODataAllocs);
+        RWDataAllocs = std::move(Other.RWDataAllocs);
+        return *this;
+      }
+
+      TargetAddress RemoteCodeAddr;
+      TargetAddress RemoteRODataAddr;
+      TargetAddress RemoteRWDataAddr;
+      std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
+    };
+
+    OrcRemoteTargetClient &Client;
+    ResourceIdMgr::ResourceId Id;
+    std::vector<ObjectAllocs> Unmapped;
+    std::vector<ObjectAllocs> Unfinalized;
+  };
+
+  /// Remote indirect stubs manager.
+  class RCIndirectStubsManager : public IndirectStubsManager {
+  public:
+    RCIndirectStubsManager(OrcRemoteTargetClient &Remote,
+                           ResourceIdMgr::ResourceId Id)
+        : Remote(Remote), Id(Id) {}
+
+    ~RCIndirectStubsManager() { Remote.destroyIndirectStubsManager(Id); }
+
+    std::error_code createStub(StringRef StubName, TargetAddress StubAddr,
+                               JITSymbolFlags StubFlags) override {
+      if (auto EC = reserveStubs(1))
+        return EC;
+
+      return createStubInternal(StubName, StubAddr, StubFlags);
+    }
+
+    std::error_code createStubs(const StubInitsMap &StubInits) override {
+      if (auto EC = reserveStubs(StubInits.size()))
+        return EC;
+
+      for (auto &Entry : StubInits)
+        if (auto EC = createStubInternal(Entry.first(), Entry.second.first,
+                                         Entry.second.second))
+          return EC;
+
+      return std::error_code();
+    }
+
+    JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
+      auto I = StubIndexes.find(Name);
+      if (I == StubIndexes.end())
+        return nullptr;
+      auto Key = I->second.first;
+      auto Flags = I->second.second;
+      auto StubSymbol = JITSymbol(getStubAddr(Key), Flags);
+      if (ExportedStubsOnly && !StubSymbol.isExported())
+        return nullptr;
+      return StubSymbol;
+    }
+
+    JITSymbol findPointer(StringRef Name) override {
+      auto I = StubIndexes.find(Name);
+      if (I == StubIndexes.end())
+        return nullptr;
+      auto Key = I->second.first;
+      auto Flags = I->second.second;
+      return JITSymbol(getPtrAddr(Key), Flags);
+    }
+
+    std::error_code updatePointer(StringRef Name,
+                                  TargetAddress NewAddr) override {
+      auto I = StubIndexes.find(Name);
+      assert(I != StubIndexes.end() && "No stub pointer for symbol");
+      auto Key = I->second.first;
+      return Remote.writePointer(getPtrAddr(Key), NewAddr);
+    }
+
+  private:
+    struct RemoteIndirectStubsInfo {
+      RemoteIndirectStubsInfo(TargetAddress StubBase, TargetAddress PtrBase,
+                              unsigned NumStubs)
+          : StubBase(StubBase), PtrBase(PtrBase), NumStubs(NumStubs) {}
+      TargetAddress StubBase;
+      TargetAddress PtrBase;
+      unsigned NumStubs;
+    };
+
+    OrcRemoteTargetClient &Remote;
+    ResourceIdMgr::ResourceId Id;
+    std::vector<RemoteIndirectStubsInfo> RemoteIndirectStubsInfos;
+    typedef std::pair<uint16_t, uint16_t> StubKey;
+    std::vector<StubKey> FreeStubs;
+    StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
+
+    std::error_code reserveStubs(unsigned NumStubs) {
+      if (NumStubs <= FreeStubs.size())
+        return std::error_code();
+
+      unsigned NewStubsRequired = NumStubs - FreeStubs.size();
+      TargetAddress StubBase;
+      TargetAddress PtrBase;
+      unsigned NumStubsEmitted;
+
+      Remote.emitIndirectStubs(StubBase, PtrBase, NumStubsEmitted, Id,
+                               NewStubsRequired);
+
+      unsigned NewBlockId = RemoteIndirectStubsInfos.size();
+      RemoteIndirectStubsInfos.push_back(
+          RemoteIndirectStubsInfo(StubBase, PtrBase, NumStubsEmitted));
+
+      for (unsigned I = 0; I < NumStubsEmitted; ++I)
+        FreeStubs.push_back(std::make_pair(NewBlockId, I));
+
+      return std::error_code();
+    }
+
+    std::error_code createStubInternal(StringRef StubName,
+                                       TargetAddress InitAddr,
+                                       JITSymbolFlags StubFlags) {
+      auto Key = FreeStubs.back();
+      FreeStubs.pop_back();
+      StubIndexes[StubName] = std::make_pair(Key, StubFlags);
+      return Remote.writePointer(getPtrAddr(Key), InitAddr);
+    }
+
+    TargetAddress getStubAddr(StubKey K) {
+      assert(RemoteIndirectStubsInfos[K.first].StubBase != 0 &&
+             "Missing stub address");
+      return RemoteIndirectStubsInfos[K.first].StubBase +
+             K.second * Remote.getIndirectStubSize();
+    }
+
+    TargetAddress getPtrAddr(StubKey K) {
+      assert(RemoteIndirectStubsInfos[K.first].PtrBase != 0 &&
+             "Missing pointer address");
+      return RemoteIndirectStubsInfos[K.first].PtrBase +
+             K.second * Remote.getPointerSize();
+    }
+  };
+
+  /// Remote compile callback manager.
+  class RCCompileCallbackManager : public JITCompileCallbackManager {
+  public:
+    RCCompileCallbackManager(TargetAddress ErrorHandlerAddress,
+                             OrcRemoteTargetClient &Remote)
+        : JITCompileCallbackManager(ErrorHandlerAddress), Remote(Remote) {
+      assert(!Remote.CompileCallback && "Compile callback already set");
+      Remote.CompileCallback = [this](TargetAddress TrampolineAddr) {
+        return executeCompileCallback(TrampolineAddr);
+      };
+      Remote.emitResolverBlock();
+    }
+
+  private:
+    void grow() {
+      TargetAddress BlockAddr = 0;
+      uint32_t NumTrampolines = 0;
+      auto EC = Remote.emitTrampolineBlock(BlockAddr, NumTrampolines);
+      assert(!EC && "Failed to create trampolines");
+
+      uint32_t TrampolineSize = Remote.getTrampolineSize();
+      for (unsigned I = 0; I < NumTrampolines; ++I)
+        this->AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize));
+    }
+
+    OrcRemoteTargetClient &Remote;
+  };
+
+  /// Create an OrcRemoteTargetClient.
+  /// Channel is the ChannelT instance to communicate on. It is assumed that
+  /// the channel is ready to be read from and written to.
+  static ErrorOr<OrcRemoteTargetClient> Create(ChannelT &Channel) {
+    std::error_code EC;
+    OrcRemoteTargetClient H(Channel, EC);
+    if (EC)
+      return EC;
+    return H;
+  }
+
+  /// Call the int(void) function at the given address in the target and return
+  /// its result.
+  std::error_code callIntVoid(int &Result, TargetAddress Addr) {
+    DEBUG(dbgs() << "Calling int(*)(void) " << format("0x%016x", Addr) << "\n");
+
+    if (auto EC = call<CallIntVoid>(Channel, Addr))
+      return EC;
+
+    unsigned NextProcId;
+    if (auto EC = listenForCompileRequests(NextProcId))
+      return EC;
+
+    if (NextProcId != CallIntVoidResponseId)
+      return orcError(OrcErrorCode::UnexpectedRPCCall);
+
+    return handle<CallIntVoidResponse>(Channel, [&](int R) {
+      Result = R;
+      DEBUG(dbgs() << "Result: " << R << "\n");
+      return std::error_code();
+    });
+  }
+
+  /// Call the int(int, char*[]) function at the given address in the target and
+  /// return its result.
+  std::error_code callMain(int &Result, TargetAddress Addr,
+                           const std::vector<std::string> &Args) {
+    DEBUG(dbgs() << "Calling int(*)(int, char*[]) " << format("0x%016x", Addr)
+                 << "\n");
+
+    if (auto EC = call<CallMain>(Channel, Addr, Args))
+      return EC;
+
+    unsigned NextProcId;
+    if (auto EC = listenForCompileRequests(NextProcId))
+      return EC;
+
+    if (NextProcId != CallMainResponseId)
+      return orcError(OrcErrorCode::UnexpectedRPCCall);
+
+    return handle<CallMainResponse>(Channel, [&](int R) {
+      Result = R;
+      DEBUG(dbgs() << "Result: " << R << "\n");
+      return std::error_code();
+    });
+  }
+
+  /// Call the void() function at the given address in the target and wait for
+  /// it to finish.
+  std::error_code callVoidVoid(TargetAddress Addr) {
+    DEBUG(dbgs() << "Calling void(*)(void) " << format("0x%016x", Addr)
+                 << "\n");
+
+    if (auto EC = call<CallVoidVoid>(Channel, Addr))
+      return EC;
+
+    unsigned NextProcId;
+    if (auto EC = listenForCompileRequests(NextProcId))
+      return EC;
+
+    if (NextProcId != CallVoidVoidResponseId)
+      return orcError(OrcErrorCode::UnexpectedRPCCall);
+
+    return handle<CallVoidVoidResponse>(Channel, doNothing);
+  }
+
+  /// Create an RCMemoryManager which will allocate its memory on the remote
+  /// target.
+  std::error_code
+  createRemoteMemoryManager(std::unique_ptr<RCMemoryManager> &MM) {
+    assert(!MM && "MemoryManager should be null before creation.");
+
+    auto Id = AllocatorIds.getNext();
+    if (auto EC = call<CreateRemoteAllocator>(Channel, Id))
+      return EC;
+    MM = llvm::make_unique<RCMemoryManager>(*this, Id);
+    return std::error_code();
+  }
+
+  /// Create an RCIndirectStubsManager that will allocate stubs on the remote
+  /// target.
+  std::error_code
+  createIndirectStubsManager(std::unique_ptr<RCIndirectStubsManager> &I) {
+    assert(!I && "Indirect stubs manager should be null before creation.");
+    auto Id = IndirectStubOwnerIds.getNext();
+    if (auto EC = call<CreateIndirectStubsOwner>(Channel, Id))
+      return EC;
+    I = llvm::make_unique<RCIndirectStubsManager>(*this, Id);
+    return std::error_code();
+  }
+
+  /// Search for symbols in the remote process. Note: This should be used by
+  /// symbol resolvers *after* they've searched the local symbol table in the
+  /// JIT stack.
+  std::error_code getSymbolAddress(TargetAddress &Addr, StringRef Name) {
+    // Check for an 'out-of-band' error, e.g. from an MM destructor.
+    if (ExistingError)
+      return ExistingError;
+
+    // Request remote symbol address.
+    if (auto EC = call<GetSymbolAddress>(Channel, Name))
+      return EC;
+
+    return expect<GetSymbolAddressResponse>(Channel, [&](TargetAddress &A) {
+      Addr = A;
+      DEBUG(dbgs() << "Remote address lookup " << Name << " = "
+                   << format("0x%016x", Addr) << "\n");
+      return std::error_code();
+    });
+  }
+
+  /// Get the triple for the remote target.
+  const std::string &getTargetTriple() const { return RemoteTargetTriple; }
+
+  std::error_code terminateSession() { return call<TerminateSession>(Channel); }
+
+private:
+  OrcRemoteTargetClient(ChannelT &Channel, std::error_code &EC)
+      : Channel(Channel), RemotePointerSize(0), RemotePageSize(0),
+        RemoteTrampolineSize(0), RemoteIndirectStubSize(0) {
+    if ((EC = call<GetRemoteInfo>(Channel)))
+      return;
+
+    EC = expect<GetRemoteInfoResponse>(
+        Channel, readArgs(RemoteTargetTriple, RemotePointerSize, RemotePageSize,
+                          RemoteTrampolineSize, RemoteIndirectStubSize));
+  }
+
+  void destroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
+    if (auto EC = call<DestroyRemoteAllocator>(Channel, Id)) {
+      // FIXME: This will be triggered by a removeModuleSet call: Propagate
+      //        error return up through that.
+      llvm_unreachable("Failed to destroy remote allocator.");
+      AllocatorIds.release(Id);
+    }
+  }
+
+  std::error_code destroyIndirectStubsManager(ResourceIdMgr::ResourceId Id) {
+    IndirectStubOwnerIds.release(Id);
+    return call<DestroyIndirectStubsOwner>(Channel, Id);
+  }
+
+  std::error_code emitIndirectStubs(TargetAddress &StubBase,
+                                    TargetAddress &PtrBase,
+                                    uint32_t &NumStubsEmitted,
+                                    ResourceIdMgr::ResourceId Id,
+                                    uint32_t NumStubsRequired) {
+    if (auto EC = call<EmitIndirectStubs>(Channel, Id, NumStubsRequired))
+      return EC;
+
+    return expect<EmitIndirectStubsResponse>(
+        Channel, readArgs(StubBase, PtrBase, NumStubsEmitted));
+  }
+
+  std::error_code emitResolverBlock() {
+    // Check for an 'out-of-band' error, e.g. from an MM destructor.
+    if (ExistingError)
+      return ExistingError;
+
+    return call<EmitResolverBlock>(Channel);
+  }
+
+  std::error_code emitTrampolineBlock(TargetAddress &BlockAddr,
+                                      uint32_t &NumTrampolines) {
+    // Check for an 'out-of-band' error, e.g. from an MM destructor.
+    if (ExistingError)
+      return ExistingError;
+
+    if (auto EC = call<EmitTrampolineBlock>(Channel))
+      return EC;
+
+    return expect<EmitTrampolineBlockResponse>(
+        Channel, [&](TargetAddress BAddr, uint32_t NTrampolines) {
+          BlockAddr = BAddr;
+          NumTrampolines = NTrampolines;
+          return std::error_code();
+        });
+  }
+
+  uint32_t getIndirectStubSize() const { return RemoteIndirectStubSize; }
+  uint32_t getPageSize() const { return RemotePageSize; }
+  uint32_t getPointerSize() const { return RemotePointerSize; }
+
+  uint32_t getTrampolineSize() const { return RemoteTrampolineSize; }
+
+  std::error_code listenForCompileRequests(uint32_t &NextId) {
+    // Check for an 'out-of-band' error, e.g. from an MM destructor.
+    if (ExistingError)
+      return ExistingError;
+
+    if (auto EC = getNextProcId(Channel, NextId))
+      return EC;
+
+    while (NextId == RequestCompileId) {
+      TargetAddress TrampolineAddr = 0;
+      if (auto EC = handle<RequestCompile>(Channel, readArgs(TrampolineAddr)))
+        return EC;
+
+      TargetAddress ImplAddr = CompileCallback(TrampolineAddr);
+      if (auto EC = call<RequestCompileResponse>(Channel, ImplAddr))
+        return EC;
+
+      if (auto EC = getNextProcId(Channel, NextId))
+        return EC;
+    }
+
+    return std::error_code();
+  }
+
+  std::error_code readMem(char *Dst, TargetAddress Src, uint64_t Size) {
+    // Check for an 'out-of-band' error, e.g. from an MM destructor.
+    if (ExistingError)
+      return ExistingError;
+
+    if (auto EC = call<ReadMem>(Channel, Src, Size))
+      return EC;
+
+    if (auto EC = expect<ReadMemResponse>(
+            Channel, [&]() { return Channel.readBytes(Dst, Size); }))
+      return EC;
+
+    return std::error_code();
+  }
+
+  std::error_code reserveMem(TargetAddress &RemoteAddr,
+                             ResourceIdMgr::ResourceId Id, uint64_t Size,
+                             uint32_t Align) {
+
+    // Check for an 'out-of-band' error, e.g. from an MM destructor.
+    if (ExistingError)
+      return ExistingError;
+
+    if (std::error_code EC = call<ReserveMem>(Channel, Id, Size, Align))
+      return EC;
+
+    return expect<ReserveMemResponse>(Channel, readArgs(RemoteAddr));
+  }
+
+  std::error_code setProtections(ResourceIdMgr::ResourceId Id,
+                                 TargetAddress RemoteSegAddr,
+                                 unsigned ProtFlags) {
+    return call<SetProtections>(Channel, Id, RemoteSegAddr, ProtFlags);
+  }
+
+  std::error_code writeMem(TargetAddress Addr, const char *Src, uint64_t Size) {
+    // Check for an 'out-of-band' error, e.g. from an MM destructor.
+    if (ExistingError)
+      return ExistingError;
+
+    // Make the send call.
+    if (auto EC = call<WriteMem>(Channel, Addr, Size))
+      return EC;
+
+    // Follow this up with the section contents.
+    if (auto EC = Channel.appendBytes(Src, Size))
+      return EC;
+
+    return Channel.send();
+  }
+
+  std::error_code writePointer(TargetAddress Addr, TargetAddress PtrVal) {
+    // Check for an 'out-of-band' error, e.g. from an MM destructor.
+    if (ExistingError)
+      return ExistingError;
+
+    return call<WritePtr>(Channel, Addr, PtrVal);
+  }
+
+  static std::error_code doNothing() { return std::error_code(); }
+
+  ChannelT &Channel;
+  std::error_code ExistingError;
+  std::string RemoteTargetTriple;
+  uint32_t RemotePointerSize;
+  uint32_t RemotePageSize;
+  uint32_t RemoteTrampolineSize;
+  uint32_t RemoteIndirectStubSize;
+  ResourceIdMgr AllocatorIds, IndirectStubOwnerIds;
+  std::function<TargetAddress(TargetAddress)> CompileCallback;
+};
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
new file mode 100644
index 0000000..96dc242
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -0,0 +1,185 @@
+//===--- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Orc remote-target RPC API. It should not be used
+// directly, but is used by the RemoteTargetClient and RemoteTargetServer
+// classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
+
+#include "JITSymbol.h"
+#include "RPCChannel.h"
+#include "RPCUtils.h"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+class OrcRemoteTargetRPCAPI : public RPC<RPCChannel> {
+protected:
+  class ResourceIdMgr {
+  public:
+    typedef uint64_t ResourceId;
+    ResourceIdMgr() : NextId(0) {}
+    ResourceId getNext() {
+      if (!FreeIds.empty()) {
+        ResourceId I = FreeIds.back();
+        FreeIds.pop_back();
+        return I;
+      }
+      return NextId++;
+    }
+    void release(ResourceId I) { FreeIds.push_back(I); }
+
+  private:
+    ResourceId NextId;
+    std::vector<ResourceId> FreeIds;
+  };
+
+public:
+  enum JITProcId : uint32_t {
+    InvalidId = 0,
+    CallIntVoidId,
+    CallIntVoidResponseId,
+    CallMainId,
+    CallMainResponseId,
+    CallVoidVoidId,
+    CallVoidVoidResponseId,
+    CreateRemoteAllocatorId,
+    CreateIndirectStubsOwnerId,
+    DestroyRemoteAllocatorId,
+    DestroyIndirectStubsOwnerId,
+    EmitIndirectStubsId,
+    EmitIndirectStubsResponseId,
+    EmitResolverBlockId,
+    EmitTrampolineBlockId,
+    EmitTrampolineBlockResponseId,
+    GetSymbolAddressId,
+    GetSymbolAddressResponseId,
+    GetRemoteInfoId,
+    GetRemoteInfoResponseId,
+    ReadMemId,
+    ReadMemResponseId,
+    ReserveMemId,
+    ReserveMemResponseId,
+    RequestCompileId,
+    RequestCompileResponseId,
+    SetProtectionsId,
+    TerminateSessionId,
+    WriteMemId,
+    WritePtrId
+  };
+
+  static const char *getJITProcIdName(JITProcId Id);
+
+  typedef Procedure<CallIntVoidId, TargetAddress /* FnAddr */> CallIntVoid;
+
+  typedef Procedure<CallIntVoidResponseId, int /* Result */>
+      CallIntVoidResponse;
+
+  typedef Procedure<CallMainId, TargetAddress /* FnAddr */,
+                    std::vector<std::string> /* Args */>
+      CallMain;
+
+  typedef Procedure<CallMainResponseId, int /* Result */> CallMainResponse;
+
+  typedef Procedure<CallVoidVoidId, TargetAddress /* FnAddr */> CallVoidVoid;
+
+  typedef Procedure<CallVoidVoidResponseId> CallVoidVoidResponse;
+
+  typedef Procedure<CreateRemoteAllocatorId,
+                    ResourceIdMgr::ResourceId /* Allocator ID */>
+      CreateRemoteAllocator;
+
+  typedef Procedure<CreateIndirectStubsOwnerId,
+                    ResourceIdMgr::ResourceId /* StubsOwner ID */>
+      CreateIndirectStubsOwner;
+
+  typedef Procedure<DestroyRemoteAllocatorId,
+                    ResourceIdMgr::ResourceId /* Allocator ID */>
+      DestroyRemoteAllocator;
+
+  typedef Procedure<DestroyIndirectStubsOwnerId,
+                    ResourceIdMgr::ResourceId /* StubsOwner ID */>
+      DestroyIndirectStubsOwner;
+
+  typedef Procedure<EmitIndirectStubsId,
+                    ResourceIdMgr::ResourceId /* StubsOwner ID */,
+                    uint32_t /* NumStubsRequired */>
+      EmitIndirectStubs;
+
+  typedef Procedure<
+      EmitIndirectStubsResponseId, TargetAddress /* StubsBaseAddr */,
+      TargetAddress /* PtrsBaseAddr */, uint32_t /* NumStubsEmitted */>
+      EmitIndirectStubsResponse;
+
+  typedef Procedure<EmitResolverBlockId> EmitResolverBlock;
+
+  typedef Procedure<EmitTrampolineBlockId> EmitTrampolineBlock;
+
+  typedef Procedure<EmitTrampolineBlockResponseId,
+                    TargetAddress /* BlockAddr */,
+                    uint32_t /* NumTrampolines */>
+      EmitTrampolineBlockResponse;
+
+  typedef Procedure<GetSymbolAddressId, std::string /*SymbolName*/>
+      GetSymbolAddress;
+
+  typedef Procedure<GetSymbolAddressResponseId, uint64_t /* SymbolAddr */>
+      GetSymbolAddressResponse;
+
+  typedef Procedure<GetRemoteInfoId> GetRemoteInfo;
+
+  typedef Procedure<GetRemoteInfoResponseId, std::string /* Triple */,
+                    uint32_t /* PointerSize */, uint32_t /* PageSize */,
+                    uint32_t /* TrampolineSize */,
+                    uint32_t /* IndirectStubSize */>
+      GetRemoteInfoResponse;
+
+  typedef Procedure<ReadMemId, TargetAddress /* Src */, uint64_t /* Size */>
+      ReadMem;
+
+  typedef Procedure<ReadMemResponseId> ReadMemResponse;
+
+  typedef Procedure<ReserveMemId, ResourceIdMgr::ResourceId /* Id */,
+                    uint64_t /* Size */, uint32_t /* Align */>
+      ReserveMem;
+
+  typedef Procedure<ReserveMemResponseId, TargetAddress /* Addr */>
+      ReserveMemResponse;
+
+  typedef Procedure<RequestCompileId, TargetAddress /* TrampolineAddr */>
+      RequestCompile;
+
+  typedef Procedure<RequestCompileResponseId, TargetAddress /* ImplAddr */>
+      RequestCompileResponse;
+
+  typedef Procedure<SetProtectionsId, ResourceIdMgr::ResourceId /* Id */,
+                    TargetAddress /* Dst */, uint32_t /* ProtFlags */>
+      SetProtections;
+
+  typedef Procedure<TerminateSessionId> TerminateSession;
+
+  typedef Procedure<WriteMemId, TargetAddress /* Dst */, uint64_t /* Size */
+                    /* Data should follow */>
+      WriteMem;
+
+  typedef Procedure<WritePtrId, TargetAddress /* Dst */,
+                    TargetAddress /* Val */>
+      WritePtr;
+};
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
new file mode 100644
index 0000000..5247661
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
@@ -0,0 +1,432 @@
+//===---- OrcRemoteTargetServer.h - Orc Remote-target Server ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the OrcRemoteTargetServer class. It can be used to build a
+// JIT server that can execute code sent from an OrcRemoteTargetClient.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
+
+#include "OrcRemoteTargetRPCAPI.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+#define DEBUG_TYPE "orc-remote"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+template <typename ChannelT, typename TargetT>
+class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI {
+public:
+  typedef std::function<TargetAddress(const std::string &Name)>
+      SymbolLookupFtor;
+
+  OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup)
+      : Channel(Channel), SymbolLookup(std::move(SymbolLookup)) {}
+
+  std::error_code getNextProcId(JITProcId &Id) {
+    return deserialize(Channel, Id);
+  }
+
+  std::error_code handleKnownProcedure(JITProcId Id) {
+    typedef OrcRemoteTargetServer ThisT;
+
+    DEBUG(dbgs() << "Handling known proc: " << getJITProcIdName(Id) << "\n");
+
+    switch (Id) {
+    case CallIntVoidId:
+      return handle<CallIntVoid>(Channel, *this, &ThisT::handleCallIntVoid);
+    case CallMainId:
+      return handle<CallMain>(Channel, *this, &ThisT::handleCallMain);
+    case CallVoidVoidId:
+      return handle<CallVoidVoid>(Channel, *this, &ThisT::handleCallVoidVoid);
+    case CreateRemoteAllocatorId:
+      return handle<CreateRemoteAllocator>(Channel, *this,
+                                           &ThisT::handleCreateRemoteAllocator);
+    case CreateIndirectStubsOwnerId:
+      return handle<CreateIndirectStubsOwner>(
+          Channel, *this, &ThisT::handleCreateIndirectStubsOwner);
+    case DestroyRemoteAllocatorId:
+      return handle<DestroyRemoteAllocator>(
+          Channel, *this, &ThisT::handleDestroyRemoteAllocator);
+    case DestroyIndirectStubsOwnerId:
+      return handle<DestroyIndirectStubsOwner>(
+          Channel, *this, &ThisT::handleDestroyIndirectStubsOwner);
+    case EmitIndirectStubsId:
+      return handle<EmitIndirectStubs>(Channel, *this,
+                                       &ThisT::handleEmitIndirectStubs);
+    case EmitResolverBlockId:
+      return handle<EmitResolverBlock>(Channel, *this,
+                                       &ThisT::handleEmitResolverBlock);
+    case EmitTrampolineBlockId:
+      return handle<EmitTrampolineBlock>(Channel, *this,
+                                         &ThisT::handleEmitTrampolineBlock);
+    case GetSymbolAddressId:
+      return handle<GetSymbolAddress>(Channel, *this,
+                                      &ThisT::handleGetSymbolAddress);
+    case GetRemoteInfoId:
+      return handle<GetRemoteInfo>(Channel, *this, &ThisT::handleGetRemoteInfo);
+    case ReadMemId:
+      return handle<ReadMem>(Channel, *this, &ThisT::handleReadMem);
+    case ReserveMemId:
+      return handle<ReserveMem>(Channel, *this, &ThisT::handleReserveMem);
+    case SetProtectionsId:
+      return handle<SetProtections>(Channel, *this,
+                                    &ThisT::handleSetProtections);
+    case WriteMemId:
+      return handle<WriteMem>(Channel, *this, &ThisT::handleWriteMem);
+    case WritePtrId:
+      return handle<WritePtr>(Channel, *this, &ThisT::handleWritePtr);
+    default:
+      return orcError(OrcErrorCode::UnexpectedRPCCall);
+    }
+
+    llvm_unreachable("Unhandled JIT RPC procedure Id.");
+  }
+
+  std::error_code requestCompile(TargetAddress &CompiledFnAddr,
+                                 TargetAddress TrampolineAddr) {
+    if (auto EC = call<RequestCompile>(Channel, TrampolineAddr))
+      return EC;
+
+    while (1) {
+      JITProcId Id = InvalidId;
+      if (auto EC = getNextProcId(Id))
+        return EC;
+
+      switch (Id) {
+      case RequestCompileResponseId:
+        return handle<RequestCompileResponse>(Channel,
+                                              readArgs(CompiledFnAddr));
+      default:
+        if (auto EC = handleKnownProcedure(Id))
+          return EC;
+      }
+    }
+
+    llvm_unreachable("Fell through request-compile command loop.");
+  }
+
+private:
+  struct Allocator {
+    Allocator() = default;
+    Allocator(Allocator &&Other) : Allocs(std::move(Other.Allocs)) {}
+    Allocator &operator=(Allocator &&Other) {
+      Allocs = std::move(Other.Allocs);
+      return *this;
+    }
+
+    ~Allocator() {
+      for (auto &Alloc : Allocs)
+        sys::Memory::releaseMappedMemory(Alloc.second);
+    }
+
+    std::error_code allocate(void *&Addr, size_t Size, uint32_t Align) {
+      std::error_code EC;
+      sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(
+          Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
+      if (EC)
+        return EC;
+
+      Addr = MB.base();
+      assert(Allocs.find(MB.base()) == Allocs.end() && "Duplicate alloc");
+      Allocs[MB.base()] = std::move(MB);
+      return std::error_code();
+    }
+
+    std::error_code setProtections(void *block, unsigned Flags) {
+      auto I = Allocs.find(block);
+      if (I == Allocs.end())
+        return orcError(OrcErrorCode::RemoteMProtectAddrUnrecognized);
+      return sys::Memory::protectMappedMemory(I->second, Flags);
+    }
+
+  private:
+    std::map<void *, sys::MemoryBlock> Allocs;
+  };
+
+  static std::error_code doNothing() { return std::error_code(); }
+
+  static TargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) {
+    TargetAddress CompiledFnAddr = 0;
+
+    auto T = static_cast<OrcRemoteTargetServer *>(JITTargetAddr);
+    auto EC = T->requestCompile(
+        CompiledFnAddr, static_cast<TargetAddress>(
+                            reinterpret_cast<uintptr_t>(TrampolineAddr)));
+    assert(!EC && "Compile request failed");
+    (void)EC;
+    return CompiledFnAddr;
+  }
+
+  std::error_code handleCallIntVoid(TargetAddress Addr) {
+    typedef int (*IntVoidFnTy)();
+    IntVoidFnTy Fn =
+        reinterpret_cast<IntVoidFnTy>(static_cast<uintptr_t>(Addr));
+
+    DEBUG(dbgs() << "  Calling "
+                 << reinterpret_cast<void *>(reinterpret_cast<intptr_t>(Fn))
+                 << "\n");
+    int Result = Fn();
+    DEBUG(dbgs() << "  Result = " << Result << "\n");
+
+    return call<CallIntVoidResponse>(Channel, Result);
+  }
+
+  std::error_code handleCallMain(TargetAddress Addr,
+                                 std::vector<std::string> Args) {
+    typedef int (*MainFnTy)(int, const char *[]);
+
+    MainFnTy Fn = reinterpret_cast<MainFnTy>(static_cast<uintptr_t>(Addr));
+    int ArgC = Args.size() + 1;
+    int Idx = 1;
+    std::unique_ptr<const char *[]> ArgV(new const char *[ArgC + 1]);
+    ArgV[0] = "<jit process>";
+    for (auto &Arg : Args)
+      ArgV[Idx++] = Arg.c_str();
+
+    DEBUG(dbgs() << "  Calling " << reinterpret_cast<void *>(Fn) << "\n");
+    int Result = Fn(ArgC, ArgV.get());
+    DEBUG(dbgs() << "  Result = " << Result << "\n");
+
+    return call<CallMainResponse>(Channel, Result);
+  }
+
+  std::error_code handleCallVoidVoid(TargetAddress Addr) {
+    typedef void (*VoidVoidFnTy)();
+    VoidVoidFnTy Fn =
+        reinterpret_cast<VoidVoidFnTy>(static_cast<uintptr_t>(Addr));
+
+    DEBUG(dbgs() << "  Calling " << reinterpret_cast<void *>(Fn) << "\n");
+    Fn();
+    DEBUG(dbgs() << "  Complete.\n");
+
+    return call<CallVoidVoidResponse>(Channel);
+  }
+
+  std::error_code handleCreateRemoteAllocator(ResourceIdMgr::ResourceId Id) {
+    auto I = Allocators.find(Id);
+    if (I != Allocators.end())
+      return orcError(OrcErrorCode::RemoteAllocatorIdAlreadyInUse);
+    DEBUG(dbgs() << "  Created allocator " << Id << "\n");
+    Allocators[Id] = Allocator();
+    return std::error_code();
+  }
+
+  std::error_code handleCreateIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
+    auto I = IndirectStubsOwners.find(Id);
+    if (I != IndirectStubsOwners.end())
+      return orcError(OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse);
+    DEBUG(dbgs() << "  Create indirect stubs owner " << Id << "\n");
+    IndirectStubsOwners[Id] = ISBlockOwnerList();
+    return std::error_code();
+  }
+
+  std::error_code handleDestroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
+    auto I = Allocators.find(Id);
+    if (I == Allocators.end())
+      return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist);
+    Allocators.erase(I);
+    DEBUG(dbgs() << "  Destroyed allocator " << Id << "\n");
+    return std::error_code();
+  }
+
+  std::error_code
+  handleDestroyIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
+    auto I = IndirectStubsOwners.find(Id);
+    if (I == IndirectStubsOwners.end())
+      return orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist);
+    IndirectStubsOwners.erase(I);
+    return std::error_code();
+  }
+
+  std::error_code handleEmitIndirectStubs(ResourceIdMgr::ResourceId Id,
+                                          uint32_t NumStubsRequired) {
+    DEBUG(dbgs() << "  ISMgr " << Id << " request " << NumStubsRequired
+                 << " stubs.\n");
+
+    auto StubOwnerItr = IndirectStubsOwners.find(Id);
+    if (StubOwnerItr == IndirectStubsOwners.end())
+      return orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist);
+
+    typename TargetT::IndirectStubsInfo IS;
+    if (auto EC =
+            TargetT::emitIndirectStubsBlock(IS, NumStubsRequired, nullptr))
+      return EC;
+
+    TargetAddress StubsBase =
+        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(IS.getStub(0)));
+    TargetAddress PtrsBase =
+        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(IS.getPtr(0)));
+    uint32_t NumStubsEmitted = IS.getNumStubs();
+
+    auto &BlockList = StubOwnerItr->second;
+    BlockList.push_back(std::move(IS));
+
+    return call<EmitIndirectStubsResponse>(Channel, StubsBase, PtrsBase,
+                                           NumStubsEmitted);
+  }
+
+  std::error_code handleEmitResolverBlock() {
+    std::error_code EC;
+    ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+        TargetT::ResolverCodeSize, nullptr,
+        sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+    if (EC)
+      return EC;
+
+    TargetT::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
+                               &reenter, this);
+
+    return sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(),
+                                            sys::Memory::MF_READ |
+                                                sys::Memory::MF_EXEC);
+  }
+
+  std::error_code handleEmitTrampolineBlock() {
+    std::error_code EC;
+    auto TrampolineBlock =
+        sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+            sys::Process::getPageSize(), nullptr,
+            sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+    if (EC)
+      return EC;
+
+    unsigned NumTrampolines =
+        (sys::Process::getPageSize() - TargetT::PointerSize) /
+        TargetT::TrampolineSize;
+
+    uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
+    TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
+                              NumTrampolines);
+
+    EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
+                                          sys::Memory::MF_READ |
+                                              sys::Memory::MF_EXEC);
+
+    TrampolineBlocks.push_back(std::move(TrampolineBlock));
+
+    return call<EmitTrampolineBlockResponse>(
+        Channel,
+        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(TrampolineMem)),
+        NumTrampolines);
+  }
+
+  std::error_code handleGetSymbolAddress(const std::string &Name) {
+    TargetAddress Addr = SymbolLookup(Name);
+    DEBUG(dbgs() << "  Symbol '" << Name << "' =  " << format("0x%016x", Addr)
+                 << "\n");
+    return call<GetSymbolAddressResponse>(Channel, Addr);
+  }
+
+  std::error_code handleGetRemoteInfo() {
+    std::string ProcessTriple = sys::getProcessTriple();
+    uint32_t PointerSize = TargetT::PointerSize;
+    uint32_t PageSize = sys::Process::getPageSize();
+    uint32_t TrampolineSize = TargetT::TrampolineSize;
+    uint32_t IndirectStubSize = TargetT::IndirectStubsInfo::StubSize;
+    DEBUG(dbgs() << "  Remote info:\n"
+                 << "    triple             = '" << ProcessTriple << "'\n"
+                 << "    pointer size       = " << PointerSize << "\n"
+                 << "    page size          = " << PageSize << "\n"
+                 << "    trampoline size    = " << TrampolineSize << "\n"
+                 << "    indirect stub size = " << IndirectStubSize << "\n");
+    return call<GetRemoteInfoResponse>(Channel, ProcessTriple, PointerSize,
+                                       PageSize, TrampolineSize,
+                                       IndirectStubSize);
+  }
+
+  std::error_code handleReadMem(TargetAddress RSrc, uint64_t Size) {
+    char *Src = reinterpret_cast<char *>(static_cast<uintptr_t>(RSrc));
+
+    DEBUG(dbgs() << "  Reading " << Size << " bytes from "
+                 << static_cast<void *>(Src) << "\n");
+
+    if (auto EC = call<ReadMemResponse>(Channel))
+      return EC;
+
+    if (auto EC = Channel.appendBytes(Src, Size))
+      return EC;
+
+    return Channel.send();
+  }
+
+  std::error_code handleReserveMem(ResourceIdMgr::ResourceId Id, uint64_t Size,
+                                   uint32_t Align) {
+    auto I = Allocators.find(Id);
+    if (I == Allocators.end())
+      return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist);
+    auto &Allocator = I->second;
+    void *LocalAllocAddr = nullptr;
+    if (auto EC = Allocator.allocate(LocalAllocAddr, Size, Align))
+      return EC;
+
+    DEBUG(dbgs() << "  Allocator " << Id << " reserved " << LocalAllocAddr
+                 << " (" << Size << " bytes, alignment " << Align << ")\n");
+
+    TargetAddress AllocAddr =
+        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(LocalAllocAddr));
+
+    return call<ReserveMemResponse>(Channel, AllocAddr);
+  }
+
+  std::error_code handleSetProtections(ResourceIdMgr::ResourceId Id,
+                                       TargetAddress Addr, uint32_t Flags) {
+    auto I = Allocators.find(Id);
+    if (I == Allocators.end())
+      return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist);
+    auto &Allocator = I->second;
+    void *LocalAddr = reinterpret_cast<void *>(static_cast<uintptr_t>(Addr));
+    DEBUG(dbgs() << "  Allocator " << Id << " set permissions on " << LocalAddr
+                 << " to " << (Flags & sys::Memory::MF_READ ? 'R' : '-')
+                 << (Flags & sys::Memory::MF_WRITE ? 'W' : '-')
+                 << (Flags & sys::Memory::MF_EXEC ? 'X' : '-') << "\n");
+    return Allocator.setProtections(LocalAddr, Flags);
+  }
+
+  std::error_code handleWriteMem(TargetAddress RDst, uint64_t Size) {
+    char *Dst = reinterpret_cast<char *>(static_cast<uintptr_t>(RDst));
+    DEBUG(dbgs() << "  Writing " << Size << " bytes to "
+                 << format("0x%016x", RDst) << "\n");
+    return Channel.readBytes(Dst, Size);
+  }
+
+  std::error_code handleWritePtr(TargetAddress Addr, TargetAddress PtrVal) {
+    DEBUG(dbgs() << "  Writing pointer *" << format("0x%016x", Addr) << " = "
+                 << format("0x%016x", PtrVal) << "\n");
+    uintptr_t *Ptr =
+        reinterpret_cast<uintptr_t *>(static_cast<uintptr_t>(Addr));
+    *Ptr = static_cast<uintptr_t>(PtrVal);
+    return std::error_code();
+  }
+
+  ChannelT &Channel;
+  SymbolLookupFtor SymbolLookup;
+  std::map<ResourceIdMgr::ResourceId, Allocator> Allocators;
+  typedef std::vector<typename TargetT::IndirectStubsInfo> ISBlockOwnerList;
+  std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners;
+  sys::OwningMemoryBlock ResolverBlock;
+  std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
+};
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h
new file mode 100644
index 0000000..b97b6da
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h
@@ -0,0 +1,179 @@
+// -*- c++ -*-
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H
+#define LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H
+
+#include "OrcError.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Endian.h"
+
+#include <system_error>
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+/// Interface for byte-streams to be used with RPC.
+class RPCChannel {
+public:
+  virtual ~RPCChannel() {}
+
+  /// Read Size bytes from the stream into *Dst.
+  virtual std::error_code readBytes(char *Dst, unsigned Size) = 0;
+
+  /// Read size bytes from *Src and append them to the stream.
+  virtual std::error_code appendBytes(const char *Src, unsigned Size) = 0;
+
+  /// Flush the stream if possible.
+  virtual std::error_code send() = 0;
+};
+
+/// RPC channel serialization for a variadic list of arguments.
+template <typename T, typename... Ts>
+std::error_code serialize_seq(RPCChannel &C, const T &Arg, const Ts &... Args) {
+  if (auto EC = serialize(C, Arg))
+    return EC;
+  return serialize_seq(C, Args...);
+}
+
+/// RPC channel serialization for an (empty) variadic list of arguments.
+inline std::error_code serialize_seq(RPCChannel &C) {
+  return std::error_code();
+}
+
+/// RPC channel deserialization for a variadic list of arguments.
+template <typename T, typename... Ts>
+std::error_code deserialize_seq(RPCChannel &C, T &Arg, Ts &... Args) {
+  if (auto EC = deserialize(C, Arg))
+    return EC;
+  return deserialize_seq(C, Args...);
+}
+
+/// RPC channel serialization for an (empty) variadic list of arguments.
+inline std::error_code deserialize_seq(RPCChannel &C) {
+  return std::error_code();
+}
+
+/// RPC channel serialization for integer primitives.
+template <typename T>
+typename std::enable_if<
+    std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
+        std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
+        std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
+        std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value,
+    std::error_code>::type
+serialize(RPCChannel &C, T V) {
+  support::endian::byte_swap<T, support::big>(V);
+  return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T));
+}
+
+/// RPC channel deserialization for integer primitives.
+template <typename T>
+typename std::enable_if<
+    std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
+        std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
+        std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
+        std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value,
+    std::error_code>::type
+deserialize(RPCChannel &C, T &V) {
+  if (auto EC = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T)))
+    return EC;
+  support::endian::byte_swap<T, support::big>(V);
+  return std::error_code();
+}
+
+/// RPC channel serialization for enums.
+template <typename T>
+typename std::enable_if<std::is_enum<T>::value, std::error_code>::type
+serialize(RPCChannel &C, T V) {
+  return serialize(C, static_cast<typename std::underlying_type<T>::type>(V));
+}
+
+/// RPC channel deserialization for enums.
+template <typename T>
+typename std::enable_if<std::is_enum<T>::value, std::error_code>::type
+deserialize(RPCChannel &C, T &V) {
+  typename std::underlying_type<T>::type Tmp;
+  std::error_code EC = deserialize(C, Tmp);
+  V = static_cast<T>(Tmp);
+  return EC;
+}
+
+/// RPC channel serialization for bools.
+inline std::error_code serialize(RPCChannel &C, bool V) {
+  uint8_t VN = V ? 1 : 0;
+  return C.appendBytes(reinterpret_cast<const char *>(&VN), 1);
+}
+
+/// RPC channel deserialization for bools.
+inline std::error_code deserialize(RPCChannel &C, bool &V) {
+  uint8_t VN = 0;
+  if (auto EC = C.readBytes(reinterpret_cast<char *>(&VN), 1))
+    return EC;
+
+  V = (VN != 0) ? true : false;
+  return std::error_code();
+}
+
+/// RPC channel serialization for StringRefs.
+/// Note: There is no corresponding deseralization for this, as StringRef
+/// doesn't own its memory and so can't hold the deserialized data.
+inline std::error_code serialize(RPCChannel &C, StringRef S) {
+  if (auto EC = serialize(C, static_cast<uint64_t>(S.size())))
+    return EC;
+  return C.appendBytes((const char *)S.bytes_begin(), S.size());
+}
+
+/// RPC channel serialization for std::strings.
+inline std::error_code serialize(RPCChannel &C, const std::string &S) {
+  return serialize(C, StringRef(S));
+}
+
+/// RPC channel deserialization for std::strings.
+inline std::error_code deserialize(RPCChannel &C, std::string &S) {
+  uint64_t Count;
+  if (auto EC = deserialize(C, Count))
+    return EC;
+  S.resize(Count);
+  return C.readBytes(&S[0], Count);
+}
+
+/// RPC channel serialization for ArrayRef<T>.
+template <typename T>
+std::error_code serialize(RPCChannel &C, const ArrayRef<T> &A) {
+  if (auto EC = serialize(C, static_cast<uint64_t>(A.size())))
+    return EC;
+
+  for (const auto &E : A)
+    if (auto EC = serialize(C, E))
+      return EC;
+
+  return std::error_code();
+}
+
+/// RPC channel serialization for std::array<T>.
+template <typename T>
+std::error_code serialize(RPCChannel &C, const std::vector<T> &V) {
+  return serialize(C, ArrayRef<T>(V));
+}
+
+/// RPC channel deserialization for std::array<T>.
+template <typename T>
+std::error_code deserialize(RPCChannel &C, std::vector<T> &V) {
+  uint64_t Count = 0;
+  if (auto EC = deserialize(C, Count))
+    return EC;
+
+  V.resize(Count);
+  for (auto &E : V)
+    if (auto EC = deserialize(C, E))
+      return EC;
+
+  return std::error_code();
+}
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
new file mode 100644
index 0000000..0bd5cbc
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -0,0 +1,266 @@
+//===----- RPCUTils.h - Basic tilities for building RPC APIs ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic utilities for building RPC APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H
+
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+// Base class containing utilities that require partial specialization.
+// These cannot be included in RPC, as template class members cannot be
+// partially specialized.
+class RPCBase {
+protected:
+  template <typename ProcedureIdT, ProcedureIdT ProcId, typename... Ts>
+  class ProcedureHelper {
+  public:
+    static const ProcedureIdT Id = ProcId;
+  };
+
+  template <typename ChannelT, typename Proc> class CallHelper;
+
+  template <typename ChannelT, typename ProcedureIdT, ProcedureIdT ProcId,
+            typename... ArgTs>
+  class CallHelper<ChannelT, ProcedureHelper<ProcedureIdT, ProcId, ArgTs...>> {
+  public:
+    static std::error_code call(ChannelT &C, const ArgTs &... Args) {
+      if (auto EC = serialize(C, ProcId))
+        return EC;
+      // If you see a compile-error on this line you're probably calling a
+      // function with the wrong signature.
+      return serialize_seq(C, Args...);
+    }
+  };
+
+  template <typename ChannelT, typename Proc> class HandlerHelper;
+
+  template <typename ChannelT, typename ProcedureIdT, ProcedureIdT ProcId,
+            typename... ArgTs>
+  class HandlerHelper<ChannelT,
+                      ProcedureHelper<ProcedureIdT, ProcId, ArgTs...>> {
+  public:
+    template <typename HandlerT>
+    static std::error_code handle(ChannelT &C, HandlerT Handler) {
+      return readAndHandle(C, Handler, llvm::index_sequence_for<ArgTs...>());
+    }
+
+  private:
+    template <typename HandlerT, size_t... Is>
+    static std::error_code readAndHandle(ChannelT &C, HandlerT Handler,
+                                         llvm::index_sequence<Is...> _) {
+      std::tuple<ArgTs...> RPCArgs;
+      if (auto EC = deserialize_seq(C, std::get<Is>(RPCArgs)...))
+        return EC;
+      return Handler(std::get<Is>(RPCArgs)...);
+    }
+  };
+
+  template <typename ClassT, typename... ArgTs> class MemberFnWrapper {
+  public:
+    typedef std::error_code (ClassT::*MethodT)(ArgTs...);
+    MemberFnWrapper(ClassT &Instance, MethodT Method)
+        : Instance(Instance), Method(Method) {}
+    std::error_code operator()(ArgTs &... Args) {
+      return (Instance.*Method)(Args...);
+    }
+
+  private:
+    ClassT &Instance;
+    MethodT Method;
+  };
+
+  template <typename... ArgTs> class ReadArgs {
+  public:
+    std::error_code operator()() { return std::error_code(); }
+  };
+
+  template <typename ArgT, typename... ArgTs>
+  class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> {
+  public:
+    ReadArgs(ArgT &Arg, ArgTs &... Args)
+        : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
+
+    std::error_code operator()(ArgT &ArgVal, ArgTs &... ArgVals) {
+      this->Arg = std::move(ArgVal);
+      return ReadArgs<ArgTs...>::operator()(ArgVals...);
+    }
+
+  private:
+    ArgT &Arg;
+  };
+};
+
+/// Contains primitive utilities for defining, calling and handling calls to
+/// remote procedures. ChannelT is a bidirectional stream conforming to the
+/// RPCChannel interface (see RPCChannel.h), and ProcedureIdT is a procedure
+/// identifier type that must be serializable on ChannelT.
+///
+/// These utilities support the construction of very primitive RPC utilities.
+/// Their intent is to ensure correct serialization and deserialization of
+/// procedure arguments, and to keep the client and server's view of the API in
+/// sync.
+///
+/// These utilities do not support return values. These can be handled by
+/// declaring a corresponding '.*Response' procedure and expecting it after a
+/// call). They also do not support versioning: the client and server *must* be
+/// compiled with the same procedure definitions.
+///
+///
+///
+/// Overview (see comments individual types/methods for details):
+///
+/// Procedure<Id, Args...> :
+///
+///   associates a unique serializable id with an argument list.
+///
+///
+/// call<Proc>(Channel, Args...) :
+///
+///   Calls the remote procedure 'Proc' by serializing Proc's id followed by its
+/// arguments and sending the resulting bytes to 'Channel'.
+///
+///
+/// handle<Proc>(Channel, <functor matching std::error_code(Args...)> :
+///
+///   Handles a call to 'Proc' by deserializing its arguments and calling the
+/// given functor. This assumes that the id for 'Proc' has already been
+/// deserialized.
+///
+/// expect<Proc>(Channel, <functor matching std::error_code(Args...)> :
+///
+///   The same as 'handle', except that the procedure id should not have been
+/// read yet. Expect will deserialize the id and assert that it matches Proc's
+/// id. If it does not, and unexpected RPC call error is returned.
+
+template <typename ChannelT, typename ProcedureIdT = uint32_t>
+class RPC : public RPCBase {
+public:
+  /// Utility class for defining/referring to RPC procedures.
+  ///
+  /// Typedefs of this utility are used when calling/handling remote procedures.
+  ///
+  /// ProcId should be a unique value of ProcedureIdT (i.e. not used with any
+  /// other Procedure typedef in the RPC API being defined.
+  ///
+  /// the template argument Ts... gives the argument list for the remote
+  /// procedure.
+  ///
+  /// E.g.
+  ///
+  ///   typedef Procedure<0, bool> Proc1;
+  ///   typedef Procedure<1, std::string, std::vector<int>> Proc2;
+  ///
+  ///   if (auto EC = call<Proc1>(Channel, true))
+  ///     /* handle EC */;
+  ///
+  ///   if (auto EC = expect<Proc2>(Channel,
+  ///         [](std::string &S, std::vector<int> &V) {
+  ///           // Stuff.
+  ///           return std::error_code();
+  ///         })
+  ///     /* handle EC */;
+  ///
+  template <ProcedureIdT ProcId, typename... Ts>
+  using Procedure = ProcedureHelper<ProcedureIdT, ProcId, Ts...>;
+
+  /// Serialize Args... to channel C, but do not call C.send().
+  ///
+  /// For buffered channels, this can be used to queue up several calls before
+  /// flushing the channel.
+  template <typename Proc, typename... ArgTs>
+  static std::error_code appendCall(ChannelT &C, const ArgTs &... Args) {
+    return CallHelper<ChannelT, Proc>::call(C, Args...);
+  }
+
+  /// Serialize Args... to channel C and call C.send().
+  template <typename Proc, typename... ArgTs>
+  static std::error_code call(ChannelT &C, const ArgTs &... Args) {
+    if (auto EC = appendCall<Proc>(C, Args...))
+      return EC;
+    return C.send();
+  }
+
+  /// Deserialize and return an enum whose underlying type is ProcedureIdT.
+  static std::error_code getNextProcId(ChannelT &C, ProcedureIdT &Id) {
+    return deserialize(C, Id);
+  }
+
+  /// Deserialize args for Proc from C and call Handler. The signature of
+  /// handler must conform to 'std::error_code(Args...)' where Args... matches
+  /// the arguments used in the Proc typedef.
+  template <typename Proc, typename HandlerT>
+  static std::error_code handle(ChannelT &C, HandlerT Handler) {
+    return HandlerHelper<ChannelT, Proc>::handle(C, Handler);
+  }
+
+  /// Helper version of 'handle' for calling member functions.
+  template <typename Proc, typename ClassT, typename... ArgTs>
+  static std::error_code
+  handle(ChannelT &C, ClassT &Instance,
+         std::error_code (ClassT::*HandlerMethod)(ArgTs...)) {
+    return handle<Proc>(
+        C, MemberFnWrapper<ClassT, ArgTs...>(Instance, HandlerMethod));
+  }
+
+  /// Deserialize a ProcedureIdT from C and verify it matches the id for Proc.
+  /// If the id does match, deserialize the arguments and call the handler
+  /// (similarly to handle).
+  /// If the id does not match, return an unexpect RPC call error and do not
+  /// deserialize any further bytes.
+  template <typename Proc, typename HandlerT>
+  static std::error_code expect(ChannelT &C, HandlerT Handler) {
+    ProcedureIdT ProcId;
+    if (auto EC = getNextProcId(C, ProcId))
+      return EC;
+    if (ProcId != Proc::Id)
+      return orcError(OrcErrorCode::UnexpectedRPCCall);
+    return handle<Proc>(C, Handler);
+  }
+
+  /// Helper version of expect for calling member functions.
+  template <typename Proc, typename ClassT, typename... ArgTs>
+  static std::error_code
+  expect(ChannelT &C, ClassT &Instance,
+         std::error_code (ClassT::*HandlerMethod)(ArgTs...)) {
+    return expect<Proc>(
+        C, MemberFnWrapper<ClassT, ArgTs...>(Instance, HandlerMethod));
+  }
+
+  /// Helper for handling setter procedures - this method returns a functor that
+  /// sets the variables referred to by Args... to values deserialized from the
+  /// channel.
+  /// E.g.
+  ///
+  ///   typedef Procedure<0, bool, int> Proc1;
+  ///
+  ///   ...
+  ///   bool B;
+  ///   int I;
+  ///   if (auto EC = expect<Proc1>(Channel, readArgs(B, I)))
+  ///     /* Handle Args */ ;
+  ///
+  template <typename... ArgTs>
+  static ReadArgs<ArgTs...> readArgs(ArgTs &... Args) {
+    return ReadArgs<ArgTs...>(Args...);
+  }
+};
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
index 207bad0..c500696 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
@@ -30,6 +30,10 @@ class ExecutionEngine;
 
 class MCJITMemoryManager : public RuntimeDyld::MemoryManager {
 public:
+
+  // Don't hide the notifyObjectLoaded method from RuntimeDyld::MemoryManager.
+  using RuntimeDyld::MemoryManager::notifyObjectLoaded;
+
   /// This method is called after an object has been loaded into memory but
   /// before relocations are applied to the loaded sections.  The object load
   /// may have been initiated by MCJIT to resolve an external symbol for another
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index 385b8d0..100e97b 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -95,7 +95,9 @@ public:
 
   /// \brief Memory Management.
   class MemoryManager {
+    friend class RuntimeDyld;
   public:
+    MemoryManager() : FinalizationLocked(false) {}
     virtual ~MemoryManager() {}
 
     /// Allocate a memory block of (at least) the given size suitable for
@@ -122,9 +124,11 @@ public:
     ///
     /// Note that by default the callback is disabled. To enable it
     /// redefine the method needsToReserveAllocationSpace to return true.
-    virtual void reserveAllocationSpace(uintptr_t CodeSize,
-                                        uintptr_t DataSizeRO,
-                                        uintptr_t DataSizeRW) {}
+    virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+                                        uintptr_t RODataSize,
+                                        uint32_t RODataAlign,
+                                        uintptr_t RWDataSize,
+                                        uint32_t RWDataAlign) {}
 
     /// Override to return true to enable the reserveAllocationSpace callback.
     virtual bool needsToReserveAllocationSpace() { return false; }
@@ -151,8 +155,23 @@ public:
     /// Returns true if an error occurred, false otherwise.
     virtual bool finalizeMemory(std::string *ErrMsg = nullptr) = 0;
 
+    /// This method is called after an object has been loaded into memory but
+    /// before relocations are applied to the loaded sections.
+    ///
+    /// Memory managers which are preparing code for execution in an external
+    /// address space can use this call to remap the section addresses for the
+    /// newly loaded object.
+    ///
+    /// For clients that do not need access to an ExecutionEngine instance this
+    /// method should be preferred to its cousin
+    /// MCJITMemoryManager::notifyObjectLoaded as this method is compatible with
+    /// ORC JIT stacks.
+    virtual void notifyObjectLoaded(RuntimeDyld &RTDyld,
+                                    const object::ObjectFile &Obj) {}
+
   private:
     virtual void anchor();
+    bool FinalizationLocked;
   };
 
   /// \brief Symbol resolution.
@@ -241,6 +260,25 @@ public:
     this->ProcessAllSections = ProcessAllSections;
   }
 
+  /// Perform all actions needed to make the code owned by this RuntimeDyld
+  /// instance executable:
+  ///
+  /// 1) Apply relocations.
+  /// 2) Register EH frames.
+  /// 3) Update memory permissions*.
+  ///
+  /// * Finalization is potentially recursive**, and the 3rd step will only be
+  ///   applied by the outermost call to finalize. This allows different
+  ///   RuntimeDyld instances to share a memory manager without the innermost
+  ///   finalization locking the memory and causing relocation fixup errors in
+  ///   outer instances.
+  ///
+  /// ** Recursive finalization occurs when one RuntimeDyld instances needs the
+  ///   address of a symbol owned by some other instance in order to apply
+  ///   relocations.
+  ///
+  void finalizeWithMemoryManagerLocking();
+
 private:
   // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
   // interface.
diff --git a/contrib/llvm/include/llvm/IR/Attributes.td b/contrib/llvm/include/llvm/IR/Attributes.td
index 797cd55..30249bb 100644
--- a/contrib/llvm/include/llvm/IR/Attributes.td
+++ b/contrib/llvm/include/llvm/IR/Attributes.td
@@ -189,4 +189,9 @@ class MergeRule<string F> {
   string MergeFunc = F;
 }
 
+def : MergeRule<"setAND<LessPreciseFPMADAttr>">;
+def : MergeRule<"setAND<NoInfsFPMathAttr>">;
+def : MergeRule<"setAND<NoNansFPMathAttr>">;
+def : MergeRule<"setAND<UnsafeFPMathAttr>">;
+def : MergeRule<"setOR<NoImplicitFloatAttr>">;
 def : MergeRule<"adjustCallerSSPLevel">;
diff --git a/contrib/llvm/include/llvm/IR/Function.h b/contrib/llvm/include/llvm/IR/Function.h
index 2a98393..4f64cae 100644
--- a/contrib/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm/include/llvm/IR/Function.h
@@ -66,7 +66,8 @@ private:
    * bit 2      : HasPrologueData
    * bit 3      : HasPersonalityFn
    * bits 4-13  : CallingConvention
-   * bits 14-15 : [reserved]
+   * bits 14    : HasGC
+   * bits 15 : [reserved]
    */
 
   /// Bits from GlobalObject::GlobalObjectSubclassData.
@@ -220,9 +221,11 @@ public:
 
   /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
   ///                             to use during code generation.
-  bool hasGC() const;
-  const char *getGC() const;
-  void setGC(const char *Str);
+  bool hasGC() const {
+    return getSubclassDataFromValue() & (1<<14);
+  }
+  const std::string &getGC() const;
+  void setGC(const std::string Str);
   void clearGC();
 
   /// @brief adds the attribute to the list of attributes.
diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h
index a305054..1b75c60 100644
--- a/contrib/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm/include/llvm/IR/IRBuilder.h
@@ -178,10 +178,10 @@ public:
   void clearFastMathFlags() { FMF.clear(); }
 
   /// \brief Set the floating point math metadata to be used.
-  void SetDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; }
+  void setDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; }
 
   /// \brief Set the fast-math flags to be used with generated fp-math operators
-  void SetFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
+  void setFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
 
   //===--------------------------------------------------------------------===//
   // RAII helpers.
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td
index 5a95ddc..f67029a 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.td
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.td
@@ -575,7 +575,7 @@ def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty],
 
 def int_experimental_gc_result   : Intrinsic<[llvm_any_ty], [llvm_token_ty],
                                              [IntrReadMem]>;
-def int_experimental_gc_relocate : Intrinsic<[llvm_anyptr_ty],
+def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
                                 [llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
                                 [IntrReadMem]>;
 
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
index 54bcbd8..8023a9f 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -1507,6 +1507,60 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
           [IntrNoMem]>;
 
+  def int_x86_avx512_mask_pshuf_d_128 : 
+         GCCBuiltin<"__builtin_ia32_pshufd128_mask">,
+        Intrinsic<[llvm_v4i32_ty],
+        [llvm_v4i32_ty, llvm_i16_ty, llvm_v4i32_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshuf_d_256 : 
+         GCCBuiltin<"__builtin_ia32_pshufd256_mask">,
+        Intrinsic<[llvm_v8i32_ty],
+        [llvm_v8i32_ty, llvm_i16_ty, llvm_v8i32_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshuf_d_512 : 
+         GCCBuiltin<"__builtin_ia32_pshufd512_mask">,
+        Intrinsic<[llvm_v16i32_ty],
+        [llvm_v16i32_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshufh_w_128 : 
+        GCCBuiltin<"__builtin_ia32_pshufhw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshufh_w_256 : 
+        GCCBuiltin<"__builtin_ia32_pshufhw256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshufh_w_512 : 
+        GCCBuiltin<"__builtin_ia32_pshufhw512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshufl_w_128 : 
+        GCCBuiltin<"__builtin_ia32_pshuflw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshufl_w_256 : 
+        GCCBuiltin<"__builtin_ia32_pshuflw256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshufl_w_512 : 
+        GCCBuiltin<"__builtin_ia32_pshuflw512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
   def int_x86_avx512_mask_shuf_f32x4_256 :
          GCCBuiltin<"__builtin_ia32_shuf_f32x4_256_mask">,
           Intrinsic<[llvm_v8f32_ty],
@@ -1836,25 +1890,69 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
                   [IntrReadArgMem]>;
-  def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx512_mask_load_ps_512 : GCCBuiltin<"__builtin_ia32_loadaps512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
-                  [IntrReadArgMem]>;
 
-  def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
-                  [IntrNoMem]>;
-  def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
-                  [IntrNoMem]>;
+  def int_x86_avx512_mask_loadu_ps_128 : 
+        GCCBuiltin<"__builtin_ia32_loadups128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+                    [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadArgMem]>;
+  def int_x86_avx512_mask_loadu_ps_256 : 
+        GCCBuiltin<"__builtin_ia32_loadups256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+                    [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadArgMem]>;        
+  def int_x86_avx512_mask_loadu_ps_512 : 
+        GCCBuiltin<"__builtin_ia32_loadups512_mask">,
+          Intrinsic<[llvm_v16f32_ty], 
+                    [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadArgMem]>;
+        
+  def int_x86_avx512_mask_loadu_pd_128 : 
+        GCCBuiltin<"__builtin_ia32_loadupd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+                    [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+  def int_x86_avx512_mask_loadu_pd_256 : 
+        GCCBuiltin<"__builtin_ia32_loadupd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+                    [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+  def int_x86_avx512_mask_loadu_pd_512 : 
+        GCCBuiltin<"__builtin_ia32_loadupd512_mask">,
+          Intrinsic<[llvm_v8f64_ty], 
+                    [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+
+  def int_x86_avx512_mask_load_ps_128 : 
+        GCCBuiltin<"__builtin_ia32_loadaps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+                    [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadArgMem]>;
+  def int_x86_avx512_mask_load_ps_256 : 
+        GCCBuiltin<"__builtin_ia32_loadaps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+                    [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadArgMem]>;        
+  def int_x86_avx512_mask_load_ps_512 : 
+        GCCBuiltin<"__builtin_ia32_loadaps512_mask">,
+          Intrinsic<[llvm_v16f32_ty], 
+                    [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadArgMem]>;
+
+  def int_x86_avx512_mask_load_pd_128 : 
+        GCCBuiltin<"__builtin_ia32_loadapd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+                    [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+  def int_x86_avx512_mask_load_pd_256 : 
+        GCCBuiltin<"__builtin_ia32_loadapd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+                    [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+  def int_x86_avx512_mask_load_pd_512 : 
+        GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
+          Intrinsic<[llvm_v8f64_ty], 
+                    [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+                
+  def int_x86_avx512_mask_move_ss : 
+        GCCBuiltin<"__builtin_ia32_movss_mask">,
+          Intrinsic<[llvm_v4f32_ty], 
+                    [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_move_sd : 
+        GCCBuiltin<"__builtin_ia32_movsd_mask">,
+          Intrinsic<[llvm_v2f64_ty], 
+                    [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
 }
 
 // Conditional store ops
@@ -2262,6 +2360,46 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
                          llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_mask_psll_w_128 : GCCBuiltin<"__builtin_ia32_psllw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_w_256 : GCCBuiltin<"__builtin_ia32_psllw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_wi_128 : GCCBuiltin<"__builtin_ia32_psllwi128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_wi_256 : GCCBuiltin<"__builtin_ia32_psllwi256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_wi_512 : GCCBuiltin<"__builtin_ia32_psllwi512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psllv16_hi : GCCBuiltin<"__builtin_ia32_psllv16hi_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psllv2_di : GCCBuiltin<"__builtin_ia32_psllv2di_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+             llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psllv32hi : GCCBuiltin<"__builtin_ia32_psllv32hi_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psllv4_di : GCCBuiltin<"__builtin_ia32_psllv4di_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psllv4_si : GCCBuiltin<"__builtin_ia32_psllv4si_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psllv8_hi : GCCBuiltin<"__builtin_ia32_psllv8hi_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psllv8_si : GCCBuiltin<"__builtin_ia32_psllv8si_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+              llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
   def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
                          llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -2823,6 +2961,28 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, 
                          llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_mask_psrav16_hi : GCCBuiltin<"__builtin_ia32_psrav16hi_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, 
+                         llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrav32_hi : GCCBuiltin<"__builtin_ia32_psrav32hi_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, 
+                         llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrav4_si : GCCBuiltin<"__builtin_ia32_psrav4si_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
+                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrav8_hi : GCCBuiltin<"__builtin_ia32_psrav8hi_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, 
+                         llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrav8_si : GCCBuiltin<"__builtin_ia32_psrav8si_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, 
+                         llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrav_q_128 : GCCBuiltin<"__builtin_ia32_psravq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, 
+                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrav_q_256 : GCCBuiltin<"__builtin_ia32_psravq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, 
+                         llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
   def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
                          llvm_v16i16_ty, llvm_v16i16_ty,  llvm_i16_ty], [IntrNoMem]>;
@@ -2844,6 +3004,83 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, 
                          llvm_v8i32_ty, llvm_v8i32_ty,  llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+                         llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+   def int_x86_avx512_mask_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128_mask">,
+              Intrinsic<[llvm_v4i32_ty] , [llvm_v4i32_ty,
+                         llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256_mask">,
+              Intrinsic<[llvm_v8i32_ty] , [llvm_v8i32_ty,
+                         llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512_mask">,
+              Intrinsic<[llvm_v16i32_ty] , [llvm_v16i32_ty,
+                         llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128_mask">,
+              Intrinsic<[llvm_v2i64_ty] , [llvm_v2i64_ty,
+                         llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256_mask">,
+              Intrinsic<[llvm_v4i64_ty] , [llvm_v4i64_ty,
+                         llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512_mask">,
+              Intrinsic<[llvm_v8i64_ty] , [llvm_v8i64_ty,
+                         llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+
+  def int_x86_avx512_mask_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+                         llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+                         llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, 
+                         llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, 
+                         llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
 }
 
 // Gather ops
@@ -4208,6 +4445,61 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">,
               Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
                         [IntrNoMem]>;
+                        
+  def int_x86_avx512_mask_pmovsxb_d_128 : GCCBuiltin<"__builtin_ia32_pmovsxbd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty,
+                        llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxb_d_256 : GCCBuiltin<"__builtin_ia32_pmovsxbd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty,
+                        llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxb_d_512 : GCCBuiltin<"__builtin_ia32_pmovsxbd512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty,
+                        llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxb_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxbq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
+                        llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxb_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxbq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty,
+                        llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxb_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxbq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty,
+                        llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxb_w_128 : GCCBuiltin<"__builtin_ia32_pmovsxbw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
+                        llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxb_w_256 : GCCBuiltin<"__builtin_ia32_pmovsxbw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty,
+                        llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxb_w_512 : GCCBuiltin<"__builtin_ia32_pmovsxbw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i8_ty,
+                        llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxd_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxdq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
+                        llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxd_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxdq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty,
+                        llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxd_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxdq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty,
+                        llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxw_d_128 : GCCBuiltin<"__builtin_ia32_pmovsxwd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
+                        llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxw_d_256 : GCCBuiltin<"__builtin_ia32_pmovsxwd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty,
+                        llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxw_d_512 : GCCBuiltin<"__builtin_ia32_pmovsxwd512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty,
+                        llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxw_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxwq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty,
+                        llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxw_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxwq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty,
+                        llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovsxw_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxwq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty,
+                        llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 }
 
 // Conversion ops
@@ -5319,6 +5611,62 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty],
                         [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pmovzxb_d_128 : GCCBuiltin<"__builtin_ia32_pmovzxbd128_mask">,
+              Intrinsic<[llvm_v4i32_ty],  [llvm_v16i8_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxb_d_256 : GCCBuiltin<"__builtin_ia32_pmovzxbd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxb_d_512 : GCCBuiltin<"__builtin_ia32_pmovzxbd512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxb_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxbq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxb_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxbq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxb_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxbq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty,
+                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxb_w_128 : GCCBuiltin<"__builtin_ia32_pmovzxbw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
+                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxb_w_256 : GCCBuiltin<"__builtin_ia32_pmovzxbw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty,
+                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxb_w_512 : GCCBuiltin<"__builtin_ia32_pmovzxbw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i8_ty,
+                         llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxd_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxdq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxd_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxdq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxd_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxdq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty,
+                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxw_d_128 : GCCBuiltin<"__builtin_ia32_pmovzxwd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxw_d_256 : GCCBuiltin<"__builtin_ia32_pmovzxwd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxw_d_512 : GCCBuiltin<"__builtin_ia32_pmovzxwd512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxw_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxwq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxw_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxwq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovzxw_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxwq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty,
+                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
 }
 //Bitwise Ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
diff --git a/contrib/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm/include/llvm/IR/LLVMContext.h
index c546fc3..56aa301 100644
--- a/contrib/llvm/include/llvm/IR/LLVMContext.h
+++ b/contrib/llvm/include/llvm/IR/LLVMContext.h
@@ -93,6 +93,17 @@ public:
   /// tag registered with an LLVMContext has an unique ID.
   uint32_t getOperandBundleTagID(StringRef Tag) const;
 
+
+  /// Define the GC for a function
+  void setGC(const Function &Fn, std::string GCName);
+
+  /// Return the GC for a function
+  const std::string &getGC(const Function &Fn);
+
+  /// Remove the GC for a function
+  void deleteGC(const Function &Fn);
+
+
   typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context,
                                          unsigned LocCookie);
 
diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h
index 4a8557d..df8ce35 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm/include/llvm/IR/Metadata.h
@@ -915,11 +915,21 @@ public:
   /// \brief Resolve cycles.
   ///
   /// Once all forward declarations have been resolved, force cycles to be
-  /// resolved. If \p AllowTemps is true, then any temporary metadata
-  /// is ignored, otherwise it asserts when encountering temporary metadata.
+  /// resolved. This interface is used when there are no more temporaries,
+  /// and thus unresolved nodes are part of cycles and no longer need RAUW
+  /// support.
   ///
   /// \pre No operands (or operands' operands, etc.) have \a isTemporary().
-  void resolveCycles(bool AllowTemps = false);
+  void resolveCycles() { resolveRecursivelyImpl(/* AllowTemps */ false); }
+
+  /// \brief Resolve cycles while ignoring temporaries.
+  ///
+  /// This drops RAUW support for any temporaries, which can no longer
+  /// be uniqued.
+  ///
+  void resolveNonTemporaries() {
+    resolveRecursivelyImpl(/* AllowTemps */ true);
+  }
 
   /// \brief Replace a temporary node with a permanent one.
   ///
@@ -977,6 +987,11 @@ private:
   void decrementUnresolvedOperandCount();
   unsigned countUnresolvedOperands();
 
+  /// Resolve cycles recursively. If \p AllowTemps is true, then any temporary
+  /// metadata is ignored, otherwise it asserts when encountering temporary
+  /// metadata.
+  void resolveRecursivelyImpl(bool AllowTemps);
+
   /// \brief Mutate this to be "uniqued".
   ///
   /// Mutate this so that \a isUniqued().
diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h
index cb2b139..90fbc1d 100644
--- a/contrib/llvm/include/llvm/InitializePasses.h
+++ b/contrib/llvm/include/llvm/InitializePasses.h
@@ -132,7 +132,6 @@ void initializeEarlyCSELegacyPassPass(PassRegistry &);
 void initializeEliminateAvailableExternallyPass(PassRegistry&);
 void initializeExpandISelPseudosPass(PassRegistry&);
 void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);
-void initializeFunctionAttrsPass(PassRegistry&);
 void initializeGCMachineCodeAnalysisPass(PassRegistry&);
 void initializeGCModuleInfoPass(PassRegistry&);
 void initializeGVNPass(PassRegistry&);
@@ -227,6 +226,7 @@ void initializePostDomOnlyViewerPass(PassRegistry&);
 void initializePostDomPrinterPass(PassRegistry&);
 void initializePostDomViewerPass(PassRegistry&);
 void initializePostDominatorTreePass(PassRegistry&);
+void initializePostOrderFunctionAttrsPass(PassRegistry&);
 void initializePostRASchedulerPass(PassRegistry&);
 void initializePostMachineSchedulerPass(PassRegistry&);
 void initializePrintFunctionPassWrapperPass(PassRegistry&);
@@ -242,6 +242,7 @@ void initializeRegionOnlyPrinterPass(PassRegistry&);
 void initializeRegionOnlyViewerPass(PassRegistry&);
 void initializeRegionPrinterPass(PassRegistry&);
 void initializeRegionViewerPass(PassRegistry&);
+void initializeReversePostOrderFunctionAttrsPass(PassRegistry&);
 void initializeRewriteStatepointsForGCPass(PassRegistry&);
 void initializeSafeStackPass(PassRegistry&);
 void initializeSCCPPass(PassRegistry&);
diff --git a/contrib/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm/include/llvm/LinkAllPasses.h
index e4192bd..6a40ca3 100644
--- a/contrib/llvm/include/llvm/LinkAllPasses.h
+++ b/contrib/llvm/include/llvm/LinkAllPasses.h
@@ -157,7 +157,8 @@ namespace {
       (void) llvm::createPostDomTree();
       (void) llvm::createInstructionNamerPass();
       (void) llvm::createMetaRenamerPass();
-      (void) llvm::createFunctionAttrsPass();
+      (void) llvm::createPostOrderFunctionAttrsPass();
+      (void) llvm::createReversePostOrderFunctionAttrsPass();
       (void) llvm::createMergeFunctionsPass();
       (void) llvm::createPrintModulePass(*(llvm::raw_ostream*)nullptr);
       (void) llvm::createPrintFunctionPass(*(llvm::raw_ostream*)nullptr);
diff --git a/contrib/llvm/include/llvm/Linker/Linker.h b/contrib/llvm/include/llvm/Linker/Linker.h
index dde3f73..2b051e6 100644
--- a/contrib/llvm/include/llvm/Linker/Linker.h
+++ b/contrib/llvm/include/llvm/Linker/Linker.h
@@ -67,10 +67,9 @@ public:
                       DenseMap<unsigned, MDNode *> *ValIDToTempMDMap);
 };
 
-/// Create a new module with exported local functions renamed and promoted
-/// for ThinLTO.
-std::unique_ptr<Module> renameModuleForThinLTO(std::unique_ptr<Module> M,
-                                               const FunctionInfoIndex *Index);
+/// Perform in-place global value handling on the given Module for
+/// exported local functions renamed and promoted for ThinLTO.
+bool renameModuleForThinLTO(Module &M, const FunctionInfoIndex *Index);
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/include/llvm/MC/MCExpr.h b/contrib/llvm/include/llvm/MC/MCExpr.h
index 1d6bdef..f6ccdc09 100644
--- a/contrib/llvm/include/llvm/MC/MCExpr.h
+++ b/contrib/llvm/include/llvm/MC/MCExpr.h
@@ -290,6 +290,9 @@ public:
     VK_Hexagon_LD_PLT,
     VK_Hexagon_IE,
     VK_Hexagon_IE_GOT,
+
+    VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr
+
     VK_TPREL,
     VK_DTPREL
   };
diff --git a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
index cf2c3f1..8a3a6af 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -92,6 +92,7 @@ protected:
   MCSection *DwarfLocSection;
   MCSection *DwarfARangesSection;
   MCSection *DwarfRangesSection;
+  MCSection *DwarfMacinfoSection;
   // The pubnames section is no longer generated by default.  The generation
   // can be enabled by a compiler flag.
   MCSection *DwarfPubNamesSection;
@@ -245,6 +246,7 @@ public:
   MCSection *getDwarfLocSection() const { return DwarfLocSection; }
   MCSection *getDwarfARangesSection() const { return DwarfARangesSection; }
   MCSection *getDwarfRangesSection() const { return DwarfRangesSection; }
+  MCSection *getDwarfMacinfoSection() const { return DwarfMacinfoSection; }
 
   // DWARF5 Experimental Debug Info Sections
   MCSection *getDwarfAccelNamesSection() const {
diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h
index 494f02d..04d143f 100644
--- a/contrib/llvm/include/llvm/MC/MCStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCStreamer.h
@@ -131,6 +131,10 @@ public:
 
   void finish() override;
 
+  /// Reset any state between object emissions, i.e. the equivalent of
+  /// MCStreamer's reset method.
+  virtual void reset();
+
   /// Callback used to implement the ldr= pseudo.
   /// Add a new entry to the constant pool for the current section and return an
   /// MCExpr that can be used to refer to the constant pool location.
diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h
index 1b0e2e3..3e69c3e 100644
--- a/contrib/llvm/include/llvm/Object/COFF.h
+++ b/contrib/llvm/include/llvm/Object/COFF.h
@@ -858,6 +858,9 @@ public:
   std::error_code getExportRVA(uint32_t &Result) const;
   std::error_code getSymbolName(StringRef &Result) const;
 
+  std::error_code isForwarder(bool &Result) const;
+  std::error_code getForwardTo(StringRef &Result) const;
+
 private:
   const export_directory_table_entry *ExportTable;
   uint32_t Index;
diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
index 5823848..5d826da 100644
--- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
@@ -842,6 +842,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
     case ELF::EM_SPARC:
     case ELF::EM_SPARC32PLUS:
       return "ELF32-sparc";
+    case ELF::EM_WEBASSEMBLY:
+      return "ELF32-wasm";
     default:
       return "ELF32-unknown";
     }
@@ -861,6 +863,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
       return "ELF64-sparc";
     case ELF::EM_MIPS:
       return "ELF64-mips";
+    case ELF::EM_WEBASSEMBLY:
+      return "ELF64-wasm";
     default:
       return "ELF64-unknown";
     }
@@ -908,6 +912,12 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
     return IsLittleEndian ? Triple::sparcel : Triple::sparc;
   case ELF::EM_SPARCV9:
     return Triple::sparcv9;
+  case ELF::EM_WEBASSEMBLY:
+    switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) {
+    case ELF::ELFCLASS32: return Triple::wasm32;
+    case ELF::ELFCLASS64: return Triple::wasm64;
+    default: return Triple::UnknownArch;
+    }
 
   default:
     return Triple::UnknownArch;
diff --git a/contrib/llvm/include/llvm/Pass.h b/contrib/llvm/include/llvm/Pass.h
index 3c4d838..99604cd 100644
--- a/contrib/llvm/include/llvm/Pass.h
+++ b/contrib/llvm/include/llvm/Pass.h
@@ -369,6 +369,10 @@ protected:
 /// @brief This is the storage for the -time-passes option.
 extern bool TimePassesIsEnabled;
 
+/// isFunctionInPrintList - returns true if a function should be printed via
+//  debugging options like -print-after-all/-print-before-all.
+//  @brief Tells if the function IR should be printed by PrinterPass.
+extern bool isFunctionInPrintList(StringRef FunctionName);
 } // End llvm namespace
 
 // Include support files that contain important APIs commonly used by Passes,
diff --git a/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h b/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h
index 3790e13..92a991e 100644
--- a/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h
+++ b/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h
@@ -20,13 +20,34 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator.h"
+#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <system_error>
 #include <tuple>
 
 namespace llvm {
+namespace coverage {
+enum class coveragemap_error {
+  success = 0,
+  eof,
+  no_data_found,
+  unsupported_version,
+  truncated,
+  malformed
+};
+} // end of coverage namespace.
+}
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::coverage::coveragemap_error> : std::true_type {
+};
+}
+
+namespace llvm {
 class IndexedInstrProfReader;
 namespace coverage {
 
@@ -35,8 +56,6 @@ class CoverageMappingReader;
 class CoverageMapping;
 struct CounterExpressions;
 
-enum CoverageMappingVersion { CoverageMappingVersion1 };
-
 /// \brief A Counter is an abstract value that describes how to compute the
 /// execution count for a region of code using the collected profile count data.
 struct Counter {
@@ -454,6 +473,76 @@ public:
   CoverageData getCoverageForExpansion(const ExpansionRecord &Expansion);
 };
 
+const std::error_category &coveragemap_category();
+
+inline std::error_code make_error_code(coveragemap_error E) {
+  return std::error_code(static_cast<int>(E), coveragemap_category());
+}
+
+// Profile coverage map has the following layout:
+// [CoverageMapFileHeader]
+// [ArrayStart]
+//  [CovMapFunctionRecord]
+//  [CovMapFunctionRecord]
+//  ...
+// [ArrayEnd]
+// [Encoded Region Mapping Data]
+LLVM_PACKED_START
+template <class IntPtrT> struct CovMapFunctionRecord {
+#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+
+  // Return the structural hash associated with the function.
+  template <support::endianness Endian> uint64_t getFuncHash() const {
+    return support::endian::byte_swap<uint64_t, Endian>(FuncHash);
+  }
+  // Return the coverage map data size for the funciton.
+  template <support::endianness Endian> uint32_t getDataSize() const {
+    return support::endian::byte_swap<uint32_t, Endian>(DataSize);
+  }
+  // Return function lookup key. The value is consider opaque.
+  template <support::endianness Endian> IntPtrT getFuncNameRef() const {
+    return support::endian::byte_swap<IntPtrT, Endian>(NamePtr);
+  }
+  // Return the PGO name of the function */
+  template <support::endianness Endian>
+  std::error_code getFuncName(InstrProfSymtab &ProfileNames,
+                              StringRef &FuncName) const {
+    IntPtrT NameRef = getFuncNameRef<Endian>();
+    uint32_t NameS = support::endian::byte_swap<uint32_t, Endian>(NameSize);
+    FuncName = ProfileNames.getFuncName(NameRef, NameS);
+    if (NameS && FuncName.empty())
+      return coveragemap_error::malformed;
+    return std::error_code();
+  }
+};
+// Per module coverage mapping data header, i.e. CoverageMapFileHeader
+// documented above.
+struct CovMapHeader {
+#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+  template <support::endianness Endian> uint32_t getNRecords() const {
+    return support::endian::byte_swap<uint32_t, Endian>(NRecords);
+  }
+  template <support::endianness Endian> uint32_t getFilenamesSize() const {
+    return support::endian::byte_swap<uint32_t, Endian>(FilenamesSize);
+  }
+  template <support::endianness Endian> uint32_t getCoverageSize() const {
+    return support::endian::byte_swap<uint32_t, Endian>(CoverageSize);
+  }
+  template <support::endianness Endian> uint32_t getVersion() const {
+    return support::endian::byte_swap<uint32_t, Endian>(Version);
+  }
+};
+
+LLVM_PACKED_END
+
+enum CoverageMappingVersion {
+  CoverageMappingVersion1 = 0,
+  // The current versin is Version1
+  CoverageMappingCurrentVersion = INSTR_PROF_COVMAP_VERSION
+};
+
 } // end namespace coverage
 
 /// \brief Provide DenseMapInfo for CounterExpression
@@ -484,26 +573,6 @@ template<> struct DenseMapInfo<coverage::CounterExpression> {
   }
 };
 
-const std::error_category &coveragemap_category();
-
-enum class coveragemap_error {
-  success = 0,
-  eof,
-  no_data_found,
-  unsupported_version,
-  truncated,
-  malformed
-};
-
-inline std::error_code make_error_code(coveragemap_error E) {
-  return std::error_code(static_cast<int>(E), coveragemap_category());
-}
-
 } // end namespace llvm
 
-namespace std {
-template <>
-struct is_error_code_enum<llvm::coveragemap_error> : std::true_type {};
-}
-
 #endif // LLVM_PROFILEDATA_COVERAGEMAPPING_H_
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
index 49569d8..c84d8d2 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
@@ -30,7 +30,6 @@
 #include <system_error>
 #include <vector>
 
-#define INSTR_PROF_INDEX_VERSION 3
 namespace llvm {
 
 class Function;
@@ -66,7 +65,8 @@ inline StringRef getInstrProfValueProfFuncName() {
 /// Return the name of the section containing function coverage mapping
 /// data.
 inline StringRef getInstrProfCoverageSectionName(bool AddSegment) {
-  return AddSegment ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+  return AddSegment ? "__DATA," INSTR_PROF_COVMAP_SECT_NAME_STR
+                    : INSTR_PROF_COVMAP_SECT_NAME_STR;
 }
 
 /// Return the name prefix of variables containing instrumented function names.
@@ -89,6 +89,12 @@ inline StringRef getCoverageMappingVarName() {
   return "__llvm_coverage_mapping";
 }
 
+/// Return the name of the internal variable recording the array
+/// of PGO name vars referenced by the coverage mapping, The owning
+/// functions of those names are not emitted by FE (e.g, unused inline
+/// functions.)
+inline StringRef getCoverageNamesVarName() { return "__llvm_coverage_names"; }
+
 /// Return the name of function that registers all the per-function control
 /// data at program startup time by calling __llvm_register_function. This
 /// function has internal linkage and is called by  __llvm_profile_init
@@ -349,11 +355,14 @@ struct InstrProfValueSiteRecord {
           return left.Value < right.Value;
         });
   }
+  /// Sort ValueData Descending by Count
+  inline void sortByCount();
 
   /// Merge data from another InstrProfValueSiteRecord
   /// Optionally scale merged counts by \p Weight.
-  instrprof_error mergeValueData(InstrProfValueSiteRecord &Input,
-                                 uint64_t Weight = 1);
+  instrprof_error merge(InstrProfValueSiteRecord &Input, uint64_t Weight = 1);
+  /// Scale up value profile data counts.
+  instrprof_error scale(uint64_t Weight);
 };
 
 /// Profiling information for a single function.
@@ -396,6 +405,19 @@ struct InstrProfRecord {
   /// Optionally scale merged counts by \p Weight.
   instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1);
 
+  /// Scale up profile counts (including value profile data) by
+  /// \p Weight.
+  instrprof_error scale(uint64_t Weight);
+
+  /// Sort value profile data (per site) by count.
+  void sortValueData() {
+    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
+      std::vector<InstrProfValueSiteRecord> &SiteRecords =
+          getValueSitesForKind(Kind);
+      for (auto &SR : SiteRecords)
+        SR.sortByCount();
+    }
+  }
   /// Clear value data entries
   void clearValueData() {
     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
@@ -430,6 +452,8 @@ private:
   // Scale merged value counts by \p Weight.
   instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
                                      uint64_t Weight);
+  // Scale up value profile data count.
+  instrprof_error scaleValueProfData(uint32_t ValueKind, uint64_t Weight);
 };
 
 uint32_t InstrProfRecord::getNumValueKinds() const {
@@ -497,11 +521,22 @@ inline support::endianness getHostEndianness() {
 #define INSTR_PROF_VALUE_PROF_DATA
 #include "llvm/ProfileData/InstrProfData.inc"
 
- /*
- * Initialize the record for runtime value profile data.
- * Return 0 if the initialization is successful, otherwise
- * return 1.
- */
+void InstrProfValueSiteRecord::sortByCount() {
+  ValueData.sort(
+      [](const InstrProfValueData &left, const InstrProfValueData &right) {
+        return left.Count > right.Count;
+      });
+  // Now truncate
+  size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
+  if (ValueData.size() > max_s)
+    ValueData.resize(max_s);
+}
+
+/*
+* Initialize the record for runtime value profile data.
+* Return 0 if the initialization is successful, otherwise
+* return 1.
+*/
 int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
                                      const uint16_t *NumValueSites,
                                      ValueProfNode **Nodes);
@@ -597,31 +632,6 @@ struct Header {
 
 }  // end namespace RawInstrProf
 
-namespace coverage {
-
-// Profile coverage map has the following layout:
-// [CoverageMapFileHeader]
-// [ArrayStart]
-//  [CovMapFunctionRecord]
-//  [CovMapFunctionRecord]
-//  ...
-// [ArrayEnd]
-// [Encoded Region Mapping Data]
-LLVM_PACKED_START
-template <class IntPtrT> struct CovMapFunctionRecord {
-  #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
-  #include "llvm/ProfileData/InstrProfData.inc"
-};
-// Per module coverage mapping data header, i.e. CoverageMapFileHeader
-// documented above.
-struct CovMapHeader {
-#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name;
-#include "llvm/ProfileData/InstrProfData.inc"
-};
-
-LLVM_PACKED_END
-}
-
 } // end namespace llvm
 
 namespace std {
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
index 3a7c0c5..33c7d94 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -28,7 +28,7 @@
  *
  * Examples of how the template is used to instantiate structure definition:
  * 1. To declare a structure:
- * 
+ *
  * struct ProfData {
  * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
  *    Type Name;
@@ -155,7 +155,7 @@ VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget)
 #endif
 COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \
                    NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \
-                   llvm::Type::getInt8PtrTy(Ctx))) 
+                   llvm::Type::getInt8PtrTy(Ctx)))
 COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \
                    llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\
                    NameValue.size()))
@@ -182,7 +182,7 @@ COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \
 COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \
               llvm::ConstantInt::get(Int32Ty, CoverageMappingSize))
 COVMAP_HEADER(uint32_t, Int32Ty, Version, \
-              llvm::ConstantInt::get(Int32Ty, CoverageMappingVersion1))
+              llvm::ConstantInt::get(Int32Ty, CoverageMappingCurrentVersion))
 #undef COVMAP_HEADER
 /* COVMAP_HEADER end.  */
 
@@ -190,7 +190,8 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \
 #ifdef INSTR_PROF_VALUE_PROF_DATA
 #define INSTR_PROF_DATA_DEFINED
 
-/*! 
+#define INSTR_PROF_MAX_NUM_VAL_PER_SITE 255
+/*!
  * This is the header of the data structure that defines the on-disk
  * layout of the value profile data of a particular kind for one function.
  */
@@ -202,7 +203,7 @@ typedef struct ValueProfRecord {
    * otherwise the record for this kind won't be emitted.
    */
   uint32_t NumValueSites;
-  /* 
+  /*
    * The first element of the array that stores the number of profiled
    * values for each value site. The size of the array is NumValueSites.
    * Since NumValueSites is greater than zero, there is at least one
@@ -226,7 +227,7 @@ typedef struct ValueProfRecord {
    * \brief Return the number of value sites.
    */
   uint32_t getNumValueSites() const { return NumValueSites; }
-  /*! 
+  /*!
    * \brief Read data from this record and save it to Record.
    */
   void deserializeTo(InstrProfRecord &Record,
@@ -247,10 +248,10 @@ typedef struct ValueProfRecord {
 typedef struct ValueProfData {
   /*
    * Total size in bytes including this field. It must be a multiple
-   * of sizeof(uint64_t). 
+   * of sizeof(uint64_t).
    */
   uint32_t TotalSize;
-  /* 
+  /*
    *The number of value profile kinds that has value profile data.
    * In this implementation, a value profile kind is considered to
    * have profile data if the number of value profile sites for the
@@ -260,7 +261,7 @@ typedef struct ValueProfData {
    */
   uint32_t NumValueKinds;
 
-  /* 
+  /*
    * Following are a sequence of variable length records. The prefix/header
    * of each record is defined by ValueProfRecord type. The number of
    * records is NumValueKinds.
@@ -314,7 +315,7 @@ typedef struct ValueProfData {
 #endif
 } ValueProfData;
 
-/* 
+/*
  * The closure is designed to abstact away two types of value profile data:
  * - InstrProfRecord which is the primary data structure used to
  *   represent profile data in host tools (reader, writer, and profile-use)
@@ -335,7 +336,7 @@ typedef struct ValueProfRecordClosure {
   uint32_t (*GetNumValueData)(const void *Record, uint32_t VKind);
   uint32_t (*GetNumValueDataForSite)(const void *R, uint32_t VK, uint32_t S);
 
-  /* 
+  /*
    * After extracting the value profile data from the value profile record,
    * this method is used to map the in-memory value to on-disk value. If
    * the method is null, value will be written out untranslated.
@@ -346,7 +347,7 @@ typedef struct ValueProfRecordClosure {
   ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes);
 } ValueProfRecordClosure;
 
-/* 
+/*
  * A wrapper struct that represents value profile runtime data.
  * Like InstrProfRecord class which is used by profiling host tools,
  * ValueProfRuntimeRecord also implements the abstract intefaces defined in
@@ -384,7 +385,7 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
 uint32_t getNumValueKindsRT(const void *R);
 
 #undef INSTR_PROF_VALUE_PROF_DATA
-#endif  /* INSTR_PROF_VALUE_PROF_DATA */ 
+#endif  /* INSTR_PROF_VALUE_PROF_DATA */
 
 
 #ifdef INSTR_PROF_COMMON_API_IMPL
@@ -412,7 +413,7 @@ uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) {
   return Size;
 }
 
-/*! 
+/*!
  * \brief Return the total size of the value profile record including the
  * header and the value data.
  */
@@ -432,7 +433,7 @@ InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) {
                                                    This->NumValueSites));
 }
 
-/*! 
+/*!
  * \brief Return the total number of value data for \c This record.
  */
 INSTR_PROF_INLINE
@@ -444,7 +445,7 @@ uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) {
   return NumValueData;
 }
 
-/*! 
+/*!
  * \brief Use this method to advance to the next \c This \c ValueProfRecord.
  */
 INSTR_PROF_INLINE
@@ -465,7 +466,7 @@ ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) {
 
 /* Closure based interfaces.  */
 
-/*! 
+/*!
  * Return the total size in bytes of the on-disk value profile data
  * given the data stored in Record.
  */
@@ -535,7 +536,7 @@ ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
   return VPD;
 }
 
-/* 
+/*
  * The value profiler runtime library stores the value profile data
  * for a given function in \c NumValueSites and \c Nodes structures.
  * \c ValueProfRuntimeRecord class is used to encapsulate the runtime
@@ -639,7 +640,7 @@ static ValueProfRecordClosure RTRecordClosure = {0,
                                                  getValueForSiteRT,
                                                  allocValueProfDataRT};
 
-/* 
+/*
  * Return the size of ValueProfData structure to store data
  * recorded in the runtime record.
  */
@@ -648,7 +649,7 @@ uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record) {
   return getValueProfDataSize(&RTRecordClosure);
 }
 
-/* 
+/*
  * Return a ValueProfData instance that stores the data collected
  * from runtime. If \c DstData is provided by the caller, the value
  * profile data will be store in *DstData and DstData is returned,
@@ -696,18 +697,31 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
 
 /* Raw profile format version. */
 #define INSTR_PROF_RAW_VERSION 2
+#define INSTR_PROF_INDEX_VERSION 3
+#define INSTR_PROF_COVMAP_VERSION 0
+
+/* Profile version is always of type uint_64_t. Reserve the upper 8 bits in the
+ * version for other variants of profile. We set the lowest bit of the upper 8
+ * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton
+ * generated profile, and 0 if this is a Clang FE generated profile.
+*/
+#define VARIANT_MASKS_ALL 0xff00000000000000ULL
+#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
 
 /* Runtime section names and name strings.  */
 #define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data
 #define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names
 #define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts
+#define INSTR_PROF_COVMAP_SECT_NAME __llvm_covmap
 
-#define INSTR_PROF_DATA_SECT_NAME_STR \
-        INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
-#define INSTR_PROF_NAME_SECT_NAME_STR \
-        INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME)
-#define INSTR_PROF_CNTS_SECT_NAME_STR \
-        INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
+#define INSTR_PROF_DATA_SECT_NAME_STR                                          \
+  INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
+#define INSTR_PROF_NAME_SECT_NAME_STR                                          \
+  INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME)
+#define INSTR_PROF_CNTS_SECT_NAME_STR                                          \
+  INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
+#define INSTR_PROF_COVMAP_SECT_NAME_STR                                        \
+  INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_SECT_NAME)
 
 /* Macros to define start/stop section symbol for a given
  * section on Linux. For instance
@@ -751,4 +765,3 @@ typedef struct ValueProfNode {
 #else
 #undef INSTR_PROF_DATA_DEFINED
 #endif
-
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
index 8df3fe8..6c39cf9 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
@@ -140,16 +140,9 @@ public:
   /// around unsigned integers.
   sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
     bool Overflowed;
-    if (Weight > 1) {
-      S = SaturatingMultiply(S, Weight, &Overflowed);
-      if (Overflowed)
-        return sampleprof_error::counter_overflow;
-    }
-    NumSamples = SaturatingAdd(NumSamples, S, &Overflowed);
-    if (Overflowed)
-      return sampleprof_error::counter_overflow;
-
-    return sampleprof_error::success;
+    NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
+    return Overflowed ? sampleprof_error::counter_overflow
+                      : sampleprof_error::success;
   }
 
   /// Add called function \p F with samples \p S.
@@ -161,16 +154,10 @@ public:
                                    uint64_t Weight = 1) {
     uint64_t &TargetSamples = CallTargets[F];
     bool Overflowed;
-    if (Weight > 1) {
-      S = SaturatingMultiply(S, Weight, &Overflowed);
-      if (Overflowed)
-        return sampleprof_error::counter_overflow;
-    }
-    TargetSamples = SaturatingAdd(TargetSamples, S, &Overflowed);
-    if (Overflowed)
-      return sampleprof_error::counter_overflow;
-
-    return sampleprof_error::success;
+    TargetSamples =
+        SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
+    return Overflowed ? sampleprof_error::counter_overflow
+                      : sampleprof_error::success;
   }
 
   /// Return true if this sample record contains function calls.
@@ -215,29 +202,17 @@ public:
   void dump() const;
   sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
     bool Overflowed;
-    if (Weight > 1) {
-      Num = SaturatingMultiply(Num, Weight, &Overflowed);
-      if (Overflowed)
-        return sampleprof_error::counter_overflow;
-    }
-    TotalSamples = SaturatingAdd(TotalSamples, Num, &Overflowed);
-    if (Overflowed)
-      return sampleprof_error::counter_overflow;
-
-    return sampleprof_error::success;
+    TotalSamples =
+        SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
+    return Overflowed ? sampleprof_error::counter_overflow
+                      : sampleprof_error::success;
   }
   sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
     bool Overflowed;
-    if (Weight > 1) {
-      Num = SaturatingMultiply(Num, Weight, &Overflowed);
-      if (Overflowed)
-        return sampleprof_error::counter_overflow;
-    }
-    TotalHeadSamples = SaturatingAdd(TotalHeadSamples, Num, &Overflowed);
-    if (Overflowed)
-      return sampleprof_error::counter_overflow;
-
-    return sampleprof_error::success;
+    TotalHeadSamples =
+        SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
+    return Overflowed ? sampleprof_error::counter_overflow
+                      : sampleprof_error::success;
   }
   sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
                                   uint64_t Num, uint64_t Weight = 1) {
diff --git a/contrib/llvm/include/llvm/Support/Allocator.h b/contrib/llvm/include/llvm/Support/Allocator.h
index c608736..043d823 100644
--- a/contrib/llvm/include/llvm/Support/Allocator.h
+++ b/contrib/llvm/include/llvm/Support/Allocator.h
@@ -187,6 +187,7 @@ public:
   /// \brief Deallocate all but the current slab and reset the current pointer
   /// to the beginning of it, freeing all memory allocated so far.
   void Reset() {
+    // Deallocate all but the first slab, and deallocate all custom-sized slabs.
     DeallocateCustomSizedSlabs();
     CustomSizedSlabs.clear();
 
@@ -198,7 +199,7 @@ public:
     CurPtr = (char *)Slabs.front();
     End = CurPtr + SlabSize;
 
-    // Deallocate all but the first slab, and deallocate all custom-sized slabs.
+    __asan_poison_memory_region(*Slabs.begin(), computeSlabSize(0));
     DeallocateSlabs(std::next(Slabs.begin()), Slabs.end());
     Slabs.erase(std::next(Slabs.begin()), Slabs.end());
   }
diff --git a/contrib/llvm/include/llvm/Support/COFF.h b/contrib/llvm/include/llvm/Support/COFF.h
index 0162175..0245632 100644
--- a/contrib/llvm/include/llvm/Support/COFF.h
+++ b/contrib/llvm/include/llvm/Support/COFF.h
@@ -656,6 +656,15 @@ namespace COFF {
     }
   };
 
+  enum CodeViewLine : unsigned {
+    CVL_LineNumberStartBits = 24,
+    CVL_LineNumberEndDeltaBits = 7,
+    CVL_LineNumberEndDeltaMask = (1U << CVL_LineNumberEndDeltaBits) - 1,
+    CVL_MaxLineNumber = (1U << CVL_LineNumberStartBits) - 1,
+    CVL_IsStatement = 1U << 31,
+    CVL_MaxColumnNumber = UINT16_MAX,
+  };
+
   enum CodeViewIdentifiers {
     DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS = 0x1,
     DEBUG_SECTION_MAGIC = 0x4,
diff --git a/contrib/llvm/include/llvm/Support/ELF.h b/contrib/llvm/include/llvm/Support/ELF.h
index 97708a7..e24420f 100644
--- a/contrib/llvm/include/llvm/Support/ELF.h
+++ b/contrib/llvm/include/llvm/Support/ELF.h
@@ -309,7 +309,12 @@ enum {
   EM_COOL          = 217, // iCelero CoolEngine
   EM_NORC          = 218, // Nanoradio Optimized RISC
   EM_CSR_KALIMBA   = 219, // CSR Kalimba architecture family
-  EM_AMDGPU        = 224  // AMD GPU architecture
+  EM_AMDGPU        = 224, // AMD GPU architecture
+
+  // A request has been made to the maintainer of the official registry for
+  // such numbers for an official value for WebAssembly. As soon as one is
+  // allocated, this enum will be updated to use it.
+  EM_WEBASSEMBLY   = 0x4157, // WebAssembly architecture
 };
 
 // Object file classes.
@@ -594,6 +599,11 @@ enum {
 #include "ELFRelocs/Sparc.def"
 };
 
+// ELF Relocation types for WebAssembly
+enum {
+#include "ELFRelocs/WebAssembly.def"
+};
+
 #undef ELF_RELOC
 
 // Section header.
@@ -1024,7 +1034,10 @@ enum {
   PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000,
   PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT   = 0x60000001,
   PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002,
-  PT_AMDGPU_HSA_LOAD_CODE_AGENT     = 0x60000003
+  PT_AMDGPU_HSA_LOAD_CODE_AGENT     = 0x60000003,
+
+  // WebAssembly program header types.
+  PT_WEBASSEMBLY_FUNCTIONS = PT_LOPROC + 0, // Function definitions.
 };
 
 // Segment flag bits.
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/WebAssembly.def b/contrib/llvm/include/llvm/Support/ELFRelocs/WebAssembly.def
new file mode 100644
index 0000000..9a34349
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/WebAssembly.def
@@ -0,0 +1,8 @@
+
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_WEBASSEMBLY_NONE,          0)
+ELF_RELOC(R_WEBASSEMBLY_DATA,          1)
+ELF_RELOC(R_WEBASSEMBLY_FUNCTION,      2)
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h
index 8751f27..8bae582 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h
@@ -724,25 +724,17 @@ public:
     if (!this->IsPostDominators) {
       // Initialize root
       NodeT *entry = TraitsTy::getEntryNode(&F);
-      this->Roots.push_back(entry);
-      this->IDoms[entry] = nullptr;
-      this->DomTreeNodes[entry] = nullptr;
+      addRoot(entry);
 
       Calculate<FT, NodeT *>(*this, F);
     } else {
       // Initialize the roots list
       for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F),
                                              E = TraitsTy::nodes_end(&F);
-           I != E; ++I) {
+           I != E; ++I)
         if (TraitsTy::child_begin(&*I) == TraitsTy::child_end(&*I))
           addRoot(&*I);
 
-        // Prepopulate maps so that we don't get iterator invalidation issues
-        // later.
-        this->IDoms[&*I] = nullptr;
-        this->DomTreeNodes[&*I] = nullptr;
-      }
-
       Calculate<FT, Inverse<NodeT *>>(*this, F);
     }
   }
diff --git a/contrib/llvm/include/llvm/Support/MathExtras.h b/contrib/llvm/include/llvm/Support/MathExtras.h
index 8111aee..408ae3c 100644
--- a/contrib/llvm/include/llvm/Support/MathExtras.h
+++ b/contrib/llvm/include/llvm/Support/MathExtras.h
@@ -717,6 +717,25 @@ SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
   return Z;
 }
 
+/// \brief Multiply two unsigned integers, X and Y, and add the unsigned
+/// integer, A to the product. Clamp the result to the maximum representable
+/// value of T on overflow. ResultOverflowed indicates if the result is larger
+/// than the maximum representable value of type T.
+/// Note that this is purely a convenience function as there is no distinction
+/// where overflow occurred in a 'fused' multiply-add for unsigned numbers.
+template <typename T>
+typename std::enable_if<std::is_unsigned<T>::value, T>::type
+SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
+  bool Dummy;
+  bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
+
+  T Product = SaturatingMultiply(X, Y, &Overflowed);
+  if (Overflowed)
+    return Product;
+
+  return SaturatingAdd(A, Product, &Overflowed);
+}
+
 extern const float huge_valf;
 } // End llvm namespace
 
diff --git a/contrib/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm/include/llvm/Transforms/IPO.h
index 0c374a0..78d2fad 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO.h
@@ -183,12 +183,20 @@ ModulePass *createBlockExtractorPass();
 ModulePass *createStripDeadPrototypesPass();
 
 //===----------------------------------------------------------------------===//
-/// createFunctionAttrsPass - This pass discovers functions that do not access
-/// memory, or only read memory, and gives them the readnone/readonly attribute.
-/// It also discovers function arguments that are not captured by the function
-/// and marks them with the nocapture attribute.
+/// createPostOrderFunctionAttrsPass - This pass walks SCCs of the call graph
+/// in post-order to deduce and propagate function attributes. It can discover
+/// functions that do not access memory, or only read memory, and give them the
+/// readnone/readonly attribute. It also discovers function arguments that are
+/// not captured by the function and marks them with the nocapture attribute.
 ///
-Pass *createFunctionAttrsPass();
+Pass *createPostOrderFunctionAttrsPass();
+
+//===----------------------------------------------------------------------===//
+/// createReversePostOrderFunctionAttrsPass - This pass walks SCCs of the call
+/// graph in RPO to deduce and propagate function attributes. Currently it
+/// only handles synthesizing norecurse attributes.
+///
+Pass *createReversePostOrderFunctionAttrsPass();
 
 //===----------------------------------------------------------------------===//
 /// createMergeFunctionsPass - This pass discovers identical functions and
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
index 92a1d52..4f006f2 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -147,42 +147,12 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                        ValueMapTypeRemapper *TypeMapper = nullptr,
                        ValueMaterializer *Materializer = nullptr);
 
-/// A helper class used with CloneAndPruneIntoFromInst to change the default
-/// behavior while instructions are being cloned.
-class CloningDirector {
-public:
-  /// This enumeration describes the way CloneAndPruneIntoFromInst should
-  /// proceed after the CloningDirector has examined an instruction.
-  enum CloningAction {
-    ///< Continue cloning the instruction (default behavior).
-    CloneInstruction,
-    ///< Skip this instruction but continue cloning the current basic block.
-    SkipInstruction,
-    ///< Skip this instruction and stop cloning the current basic block.
-    StopCloningBB,
-    ///< Don't clone the terminator but clone the current block's successors.
-    CloneSuccessors
-  };
-
-  virtual ~CloningDirector() {}
-
-  /// Subclasses must override this function to customize cloning behavior.
-  virtual CloningAction handleInstruction(ValueToValueMapTy &VMap,
-                                          const Instruction *Inst,
-                                          BasicBlock *NewBB) = 0;
-
-  virtual ValueMapTypeRemapper *getTypeRemapper() { return nullptr; }
-  virtual ValueMaterializer *getValueMaterializer() { return nullptr; }
-};
-
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                const Instruction *StartingInst,
                                ValueToValueMapTy &VMap, bool ModuleLevelChanges,
-                               SmallVectorImpl<ReturnInst*> &Returns,
-                               const char *NameSuffix = "", 
-                               ClonedCodeInfo *CodeInfo = nullptr,
-                               CloningDirector *Director = nullptr);
-
+                               SmallVectorImpl<ReturnInst *> &Returns,
+                               const char *NameSuffix = "",
+                               ClonedCodeInfo *CodeInfo = nullptr);
 
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
 /// except that it does some simple constant prop and DCE on the fly.  The
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
index 81b376f..911c6f1 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
@@ -42,6 +42,7 @@ class TargetLibraryInfo;
 class TargetTransformInfo;
 class DIBuilder;
 class DominatorTree;
+class LazyValueInfo;
 
 template<typename T> class SmallVectorImpl;
 
@@ -303,7 +304,7 @@ void removeUnwindEdge(BasicBlock *BB);
 /// \brief Remove all blocks that can not be reached from the function's entry.
 ///
 /// Returns true if any basic block was removed.
-bool removeUnreachableBlocks(Function &F);
+bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr);
 
 /// \brief Combine the metadata of two instructions so that K can replace J
 ///
diff --git a/contrib/llvm/include/llvm/module.modulemap b/contrib/llvm/include/llvm/module.modulemap
index 0adce0c..d74ada6 100644
--- a/contrib/llvm/include/llvm/module.modulemap
+++ b/contrib/llvm/include/llvm/module.modulemap
@@ -207,6 +207,7 @@ module LLVM_Utils {
     textual header "Support/ELFRelocs/Sparc.def"
     textual header "Support/ELFRelocs/SystemZ.def"
     textual header "Support/ELFRelocs/x86_64.def"
+    textual header "Support/ELFRelocs/WebAssembly.def"
   }
 
   // This part of the module is usable from both C and C++ code.
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 85404d8..c3d2803 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -586,8 +586,13 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
   return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
 }
 
-ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
-                                           unsigned ArgIdx) {
+/// Returns true if this is a writeonly (i.e Mod only) parameter.  Currently,
+/// we don't have a writeonly attribute, so this only knows about builtin
+/// intrinsics and target library functions.  We could consider adding a
+/// writeonly attribute in the future and moving all of these facts to either
+/// Intrinsics.td or InferFunctionAttr.cpp
+static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
+                             const TargetLibraryInfo &TLI) {
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
     switch (II->getIntrinsicID()) {
     default:
@@ -597,9 +602,9 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
     case Intrinsic::memmove:
       // We don't currently have a writeonly attribute.  All other properties
       // of these intrinsics are nicely described via attributes in
-      // Intrinsics.td and handled generically below.
+      // Intrinsics.td and handled generically.
       if (ArgIdx == 0)
-        return MRI_Mod;
+        return true;
     }
 
   // We can bound the aliasing properties of memset_pattern16 just as we can
@@ -609,7 +614,22 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
   // handled via InferFunctionAttr.
   if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
     if (ArgIdx == 0)
-      return MRI_Mod;
+      return true;
+
+  // TODO: memset_pattern4, memset_pattern8
+  // TODO: _chk variants
+  // TODO: strcmp, strcpy
+
+  return false;
+}
+
+ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+                                           unsigned ArgIdx) {
+
+  // Emulate the missing writeonly attribute by checking for known builtin
+  // intrinsics and target library functions.
+  if (isWriteOnlyParam(CS, ArgIdx, TLI))
+    return MRI_Mod;
 
   if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
     return MRI_Ref;
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 07b389a..6dd1d0a 100644
--- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -612,9 +612,10 @@ namespace {
     bool runOnSCC(CallGraphSCC &SCC) override {
       Out << Banner;
       for (CallGraphNode *CGN : SCC) {
-        if (CGN->getFunction())
-          CGN->getFunction()->print(Out);
-        else
+        if (CGN->getFunction()) {
+          if (isFunctionInPrintList(CGN->getFunction()->getName()))
+            CGN->getFunction()->print(Out);
+        } else
           Out << "\nPrinting <null> Function\n";
       }
       return false;
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index 249f395..1babb82 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -358,21 +358,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
         if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
           if (Writers)
             Writers->insert(CS->getParent()->getParent());
-        } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) {
-          Function *ParentF = CS->getParent()->getParent();
-          // A nocapture argument may be read from or written to, but does not
-          // escape unless the call can somehow recurse.
-          //
-          // nocapture "indicates that the callee does not make any copies of
-          // the pointer that outlive itself". Therefore if we directly or
-          // indirectly recurse, we must treat the pointer as escaping.
-          if (FunctionToSCCMap[ParentF] ==
-              FunctionToSCCMap[CS.getCalledFunction()])
-            return true;
-          if (Readers)
-            Readers->insert(ParentF);
-          if (Writers)
-            Writers->insert(ParentF);
         } else {
           return true; // Argument of an unknown call.
         }
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index b89ff26..6dfe625 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -70,7 +70,7 @@ static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
 static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
 static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
 
-/// getFalse - For a boolean type, or a vector of boolean type, return false, or
+/// For a boolean type, or a vector of boolean type, return false, or
 /// a vector with every element false, as appropriate for the type.
 static Constant *getFalse(Type *Ty) {
   assert(Ty->getScalarType()->isIntegerTy(1) &&
@@ -78,7 +78,7 @@ static Constant *getFalse(Type *Ty) {
   return Constant::getNullValue(Ty);
 }
 
-/// getTrue - For a boolean type, or a vector of boolean type, return true, or
+/// For a boolean type, or a vector of boolean type, return true, or
 /// a vector with every element true, as appropriate for the type.
 static Constant *getTrue(Type *Ty) {
   assert(Ty->getScalarType()->isIntegerTy(1) &&
@@ -100,7 +100,7 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
     CRHS == LHS;
 }
 
-/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+/// Does the given value dominate the specified phi node?
 static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I)
@@ -131,8 +131,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
   return false;
 }
 
-/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
-/// it into "(A op B) op' (A op C)".  Here "op" is given by Opcode and "op'" is
+/// Simplify "A op (B op' C)" by distributing op over op', turning it into
+/// "(A op B) op' (A op C)".  Here "op" is given by Opcode and "op'" is
 /// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
 /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
 /// Returns the simplified value, or null if no simplification was performed.
@@ -193,8 +193,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
   return nullptr;
 }
 
-/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
-/// operations.  Returns the simpler value, or null if none was found.
+/// Generic simplifications for associative binary operations.
+/// Returns the simpler value, or null if none was found.
 static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
                                        const Query &Q, unsigned MaxRecurse) {
   Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
@@ -290,10 +290,10 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
   return nullptr;
 }
 
-/// ThreadBinOpOverSelect - In the case of a binary operation with a select
-/// instruction as an operand, try to simplify the binop by seeing whether
-/// evaluating it on both branches of the select results in the same value.
-/// Returns the common value if so, otherwise returns null.
+/// In the case of a binary operation with a select instruction as an operand,
+/// try to simplify the binop by seeing whether evaluating it on both branches
+/// of the select results in the same value. Returns the common value if so,
+/// otherwise returns null.
 static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
                                     const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -362,10 +362,9 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
   return nullptr;
 }
 
-/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
-/// try to simplify the comparison by seeing whether both branches of the select
-/// result in the same value.  Returns the common value if so, otherwise returns
-/// null.
+/// In the case of a comparison with a select instruction, try to simplify the
+/// comparison by seeing whether both branches of the select result in the same
+/// value. Returns the common value if so, otherwise returns null.
 static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
                                   Value *RHS, const Query &Q,
                                   unsigned MaxRecurse) {
@@ -444,10 +443,10 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
   return nullptr;
 }
 
-/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
-/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
-/// it on the incoming phi values yields the same result for every value.  If so
-/// returns the common value, otherwise returns null.
+/// In the case of a binary operation with an operand that is a PHI instruction,
+/// try to simplify the binop by seeing whether evaluating it on the incoming
+/// phi values yields the same result for every value. If so returns the common
+/// value, otherwise returns null.
 static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
                                  const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -486,10 +485,10 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
   return CommonValue;
 }
 
-/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
-/// try to simplify the comparison by seeing whether comparing with all of the
-/// incoming phi values yields the same result every time.  If so returns the
-/// common result, otherwise returns null.
+/// In the case of a comparison with a PHI instruction, try to simplify the
+/// comparison by seeing whether comparing with all of the incoming phi values
+/// yields the same result every time. If so returns the common result,
+/// otherwise returns null.
 static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
                                const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -524,8 +523,8 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
   return CommonValue;
 }
 
-/// SimplifyAddInst - Given operands for an Add, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an Add, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                               const Query &Q, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -656,8 +655,8 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
   return ConstantExpr::getSub(LHSOffset, RHSOffset);
 }
 
-/// SimplifySubInst - Given operands for a Sub, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for a Sub, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                               const Query &Q, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0))
@@ -889,8 +888,8 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
  return nullptr;
 }
 
-/// SimplifyMulInst - Given operands for a Mul, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for a Mul, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
                               unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -989,8 +988,8 @@ Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
                            RecursionLimit);
 }
 
-/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an SDiv or UDiv, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
                           const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1075,8 +1074,8 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   return nullptr;
 }
 
-/// SimplifySDivInst - Given operands for an SDiv, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an SDiv, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
                                unsigned MaxRecurse) {
   if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
@@ -1093,8 +1092,8 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
                             RecursionLimit);
 }
 
-/// SimplifyUDivInst - Given operands for a UDiv, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for a UDiv, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
                                unsigned MaxRecurse) {
   if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
@@ -1154,8 +1153,8 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                             RecursionLimit);
 }
 
-/// SimplifyRem - Given operands for an SRem or URem, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an SRem or URem, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
                           const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1215,8 +1214,8 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   return nullptr;
 }
 
-/// SimplifySRemInst - Given operands for an SRem, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an SRem, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
                                unsigned MaxRecurse) {
   if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
@@ -1233,8 +1232,8 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
                             RecursionLimit);
 }
 
-/// SimplifyURemInst - Given operands for a URem, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for a URem, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
                                unsigned MaxRecurse) {
   if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
@@ -1279,7 +1278,7 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                             RecursionLimit);
 }
 
-/// isUndefShift - Returns true if a shift by \c Amount always yields undef.
+/// Returns true if a shift by \c Amount always yields undef.
 static bool isUndefShift(Value *Amount) {
   Constant *C = dyn_cast<Constant>(Amount);
   if (!C)
@@ -1306,8 +1305,8 @@ static bool isUndefShift(Value *Amount) {
   return false;
 }
 
-/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an Shl, LShr or AShr, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
                             const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1375,8 +1374,8 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
   return nullptr;
 }
 
-/// SimplifyShlInst - Given operands for an Shl, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an Shl, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                               const Query &Q, unsigned MaxRecurse) {
   if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
@@ -1402,8 +1401,8 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                            RecursionLimit);
 }
 
-/// SimplifyLShrInst - Given operands for an LShr, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an LShr, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
                                const Query &Q, unsigned MaxRecurse) {
   if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
@@ -1427,8 +1426,8 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
                             RecursionLimit);
 }
 
-/// SimplifyAShrInst - Given operands for an AShr, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an AShr, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
                                const Query &Q, unsigned MaxRecurse) {
   if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
@@ -1502,8 +1501,8 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
   return nullptr;
 }
 
-// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
-// of possible values cannot be satisfied.
+/// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
+/// of possible values cannot be satisfied.
 static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   ICmpInst::Predicate Pred0, Pred1;
   ConstantInt *CI1, *CI2;
@@ -1554,8 +1553,8 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   return nullptr;
 }
 
-/// SimplifyAndInst - Given operands for an And, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an And, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
                               unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1661,8 +1660,8 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
                            RecursionLimit);
 }
 
-// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
-// contains all possible values.
+/// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
+/// contains all possible values.
 static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   ICmpInst::Predicate Pred0, Pred1;
   ConstantInt *CI1, *CI2;
@@ -1713,8 +1712,8 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   return nullptr;
 }
 
-/// SimplifyOrInst - Given operands for an Or, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an Or, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
                              unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1849,8 +1848,8 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
                           RecursionLimit);
 }
 
-/// SimplifyXorInst - Given operands for a Xor, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for a Xor, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
                               unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1910,9 +1909,9 @@ static Type *GetCompareTy(Value *Op) {
   return CmpInst::makeCmpResultType(Op->getType());
 }
 
-/// ExtractEquivalentCondition - Rummage around inside V looking for something
-/// equivalent to the comparison "LHS Pred RHS".  Return such a value if found,
-/// otherwise return null.  Helper function for analyzing max/min idioms.
+/// Rummage around inside V looking for something equivalent to the comparison
+/// "LHS Pred RHS". Return such a value if found, otherwise return null.
+/// Helper function for analyzing max/min idioms.
 static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
                                          Value *LHS, Value *RHS) {
   SelectInst *SI = dyn_cast<SelectInst>(V);
@@ -2100,21 +2099,17 @@ static Constant *computePointerICmp(const DataLayout &DL,
     // that might be resolve lazily to symbols in another dynamically-loaded
     // library (and, thus, could be malloc'ed by the implementation).
     auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
-      return std::all_of(Objects.begin(), Objects.end(),
-                         [](Value *V){
-                           if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
-                             return AI->getParent() && AI->getParent()->getParent() &&
-                                    AI->isStaticAlloca();
-                           if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-                             return (GV->hasLocalLinkage() ||
-                                     GV->hasHiddenVisibility() ||
-                                     GV->hasProtectedVisibility() ||
-                                     GV->hasUnnamedAddr()) &&
-                                    !GV->isThreadLocal();
-                           if (const Argument *A = dyn_cast<Argument>(V))
-                             return A->hasByValAttr();
-                           return false;
-                         });
+      return std::all_of(Objects.begin(), Objects.end(), [](Value *V) {
+        if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+          return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
+        if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+          return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
+                  GV->hasProtectedVisibility() || GV->hasUnnamedAddr()) &&
+                 !GV->isThreadLocal();
+        if (const Argument *A = dyn_cast<Argument>(V))
+          return A->hasByValAttr();
+        return false;
+      });
     };
 
     if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
@@ -2127,8 +2122,8 @@ static Constant *computePointerICmp(const DataLayout &DL,
   return nullptr;
 }
 
-/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an ICmpInst, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                                const Query &Q, unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
@@ -3102,8 +3097,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                             RecursionLimit);
 }
 
-/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an FCmpInst, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                                FastMathFlags FMF, const Query &Q,
                                unsigned MaxRecurse) {
@@ -3227,8 +3222,7 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                             Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
-/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
-/// replaced with RepOp.
+/// See if V simplifies when its operand Op is replaced with RepOp.
 static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
                                            const Query &Q,
                                            unsigned MaxRecurse) {
@@ -3311,8 +3305,8 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   return nullptr;
 }
 
-/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
-/// the result.  If not, this returns null.
+/// Given operands for a SelectInst, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
                                  Value *FalseVal, const Query &Q,
                                  unsigned MaxRecurse) {
@@ -3449,8 +3443,8 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
                               Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
-/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for an GetElementPtrInst, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
                               const Query &Q, unsigned) {
   // The type of the GEP pointer operand.
@@ -3542,8 +3536,8 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
       Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
-/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
-/// can fold the result.  If not, this returns null.
+/// Given operands for an InsertValueInst, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
                                       ArrayRef<unsigned> Idxs, const Query &Q,
                                       unsigned) {
@@ -3579,8 +3573,8 @@ Value *llvm::SimplifyInsertValueInst(
                                    RecursionLimit);
 }
 
-/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we
-/// can fold the result.  If not, this returns null.
+/// Given operands for an ExtractValueInst, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
                                        const Query &, unsigned) {
   if (auto *CAgg = dyn_cast<Constant>(Agg))
@@ -3614,8 +3608,8 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
                                     RecursionLimit);
 }
 
-/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we
-/// can fold the result.  If not, this returns null.
+/// Given operands for an ExtractElementInst, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
                                          unsigned) {
   if (auto *CVec = dyn_cast<Constant>(Vec)) {
@@ -3646,7 +3640,7 @@ Value *llvm::SimplifyExtractElementInst(
                                       RecursionLimit);
 }
 
-/// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
+/// See if we can fold the given phi. If not, returns null.
 static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
   // If all of the PHI's incoming values are the same then replace the PHI node
   // with the common value.
@@ -3696,8 +3690,8 @@ Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
 
 //=== Helper functions for higher up the class hierarchy.
 
-/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for a BinaryOperator, see if we can fold the result.
+/// If not, this returns null.
 static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                             const Query &Q, unsigned MaxRecurse) {
   switch (Opcode) {
@@ -3763,8 +3757,8 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
   }
 }
 
-/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can
-/// fold the result.  If not, this returns null.
+/// Given operands for a BinaryOperator, see if we can fold the result.
+/// If not, this returns null.
 /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
 /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
 static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
@@ -3799,8 +3793,7 @@ Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                            RecursionLimit);
 }
 
-/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
-/// fold the result.
+/// Given operands for a CmpInst, see if we can fold the result.
 static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               const Query &Q, unsigned MaxRecurse) {
   if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
@@ -3938,8 +3931,8 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
                         Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
-/// SimplifyInstruction - See if we can compute a simplified version of this
-/// instruction.  If not, this returns null.
+/// See if we can compute a simplified version of this instruction.
+/// If not, this returns null.
 Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
                                  const TargetLibraryInfo *TLI,
                                  const DominatorTree *DT, AssumptionCache *AC) {
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index d7896ad..8bcdcb8 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -845,6 +845,7 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
   if (Lp != AR->getLoop()) {
     DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
           *Ptr << " SCEV: " << *PtrScev << "\n");
+    return 0;
   }
 
   // The address calculation must not wrap. Otherwise, a dependence could be
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 9ab9eea..0c725fc 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -637,8 +637,10 @@ LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
   analyze(DomTree);
 }
 
-void LoopInfo::updateUnloop(Loop *Unloop) {
-  Unloop->markUnlooped();
+void LoopInfo::markAsRemoved(Loop *Unloop) {
+  assert(!Unloop->isInvalid() && "Loop has already been removed");
+  Unloop->invalidate();
+  RemovedLoops.push_back(Unloop);
 
   // First handle the special case of no parent loop to simplify the algorithm.
   if (!Unloop->getParentLoop()) {
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index dc42473..8163231 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -42,7 +42,11 @@ public:
   }
 
   bool runOnLoop(Loop *L, LPPassManager &) override {
-    P.run(*L);
+    auto BBI = find_if(L->blocks().begin(), L->blocks().end(),
+                       [](BasicBlock *BB) { return BB; });
+    if (BBI != L->blocks().end() &&
+        isFunctionInPrintList((*BBI)->getParent()->getName()))
+      P.run(*L);
     return false;
   }
 };
@@ -174,8 +178,9 @@ bool LPPassManager::runOnFunction(Function &F) {
 
   // Walk Loops
   while (!LQ.empty()) {
-
+    bool LoopWasDeleted = false;
     CurrentLoop = LQ.back();
+
     // Run all passes on the current Loop.
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
@@ -192,15 +197,15 @@ bool LPPassManager::runOnFunction(Function &F) {
 
         Changed |= P->runOnLoop(CurrentLoop, *this);
       }
+      LoopWasDeleted = CurrentLoop->isInvalid();
 
       if (Changed)
         dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
-                     CurrentLoop->isUnloop()
-                         ? "<deleted>"
-                         : CurrentLoop->getHeader()->getName());
+                     LoopWasDeleted ? "<deleted>"
+                                    : CurrentLoop->getHeader()->getName());
       dumpPreservedSet(P);
 
-      if (CurrentLoop->isUnloop()) {
+      if (LoopWasDeleted) {
         // Notify passes that the loop is being deleted.
         deleteSimpleAnalysisLoop(CurrentLoop);
       } else {
@@ -222,12 +227,11 @@ bool LPPassManager::runOnFunction(Function &F) {
 
       removeNotPreservedAnalysis(P);
       recordAvailableAnalysis(P);
-      removeDeadPasses(P, CurrentLoop->isUnloop()
-                              ? "<deleted>"
-                              : CurrentLoop->getHeader()->getName(),
+      removeDeadPasses(P, LoopWasDeleted ? "<deleted>"
+                                         : CurrentLoop->getHeader()->getName(),
                        ON_LOOP_MSG);
 
-      if (CurrentLoop->isUnloop())
+      if (LoopWasDeleted)
         // Do not run other passes on this loop.
         break;
     }
@@ -235,12 +239,11 @@ bool LPPassManager::runOnFunction(Function &F) {
     // If the loop was deleted, release all the loop passes. This frees up
     // some memory, and avoids trouble with the pass manager trying to call
     // verifyAnalysis on them.
-    if (CurrentLoop->isUnloop()) {
+    if (LoopWasDeleted) {
       for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
         Pass *P = getContainedPass(Index);
         freePass(P, "<deleted>", ON_LOOP_MSG);
       }
-      delete CurrentLoop;
     }
 
     // Pop the loop from queue after running all passes.
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 029997a..486f3a5 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -26,7 +26,7 @@
 // ... = load %ptr2, !alias.scope !{ !scope1, !scope2 }, !noalias !{ !scope1 }
 //
 // When evaluating an aliasing query, if one of the instructions is associated
-// has a set of noalias scopes in some domain that is superset of the alias
+// has a set of noalias scopes in some domain that is a superset of the alias
 // scopes in that domain of some other instruction, then the two memory
 // accesses are assumed not to alias.
 //
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 805f3ef..9f92391 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -70,7 +70,7 @@
 //   A a;
 // } B;
 //
-// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
+// For an access to B.a.s, we attach !5 (a path tag node) to the load/store
 // instruction. The base type is !4 (struct B), the access type is !2 (scalar
 // type short) and the offset is 4.
 //
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index abc57ed..a83e207 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -2556,6 +2556,9 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
 
   switch (I->getOpcode()) {
   default: break;
+  // Unsigned integers are always nonnegative.
+  case Instruction::UIToFP:
+    return true;
   case Instruction::FMul:
     // x*x is always non-negative or a NaN.
     if (I->getOperand(0) == I->getOperand(1)) 
@@ -2566,6 +2569,9 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
   case Instruction::FRem:
     return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
            CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+  case Instruction::Select:
+    return CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1) &&
+           CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1);
   case Instruction::FPExt:
   case Instruction::FPTrunc:
     // Widening/narrowing never change sign.
@@ -2574,6 +2580,12 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 
       switch (II->getIntrinsicID()) {
       default: break;
+      case Intrinsic::maxnum:
+        return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) ||
+               CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+      case Intrinsic::minnum:
+        return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
+               CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
       case Intrinsic::exp:
       case Intrinsic::exp2:
       case Intrinsic::fabs:
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index c7606fd..2ad4b32 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2654,8 +2654,6 @@ std::error_code BitcodeReader::parseConstants() {
         return error("Invalid record");
 
       Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
-      unsigned Size = Record.size();
-
       if (EltTy->isIntegerTy(8)) {
         SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
@@ -2680,21 +2678,24 @@ std::error_code BitcodeReader::parseConstants() {
           V = ConstantDataVector::get(Context, Elts);
         else
           V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isHalfTy()) {
+        SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::getFP(Context, Elts);
+        else
+          V = ConstantDataArray::getFP(Context, Elts);
       } else if (EltTy->isFloatTy()) {
-        SmallVector<float, 16> Elts(Size);
-        std::transform(Record.begin(), Record.end(), Elts.begin(), BitsToFloat);
+        SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
-          V = ConstantDataVector::get(Context, Elts);
+          V = ConstantDataVector::getFP(Context, Elts);
         else
-          V = ConstantDataArray::get(Context, Elts);
+          V = ConstantDataArray::getFP(Context, Elts);
       } else if (EltTy->isDoubleTy()) {
-        SmallVector<double, 16> Elts(Size);
-        std::transform(Record.begin(), Record.end(), Elts.begin(),
-                       BitsToDouble);
+        SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
-          V = ConstantDataVector::get(Context, Elts);
+          V = ConstantDataVector::getFP(Context, Elts);
         else
-          V = ConstantDataArray::get(Context, Elts);
+          V = ConstantDataArray::getFP(Context, Elts);
       } else {
         return error("Invalid type for value");
       }
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index a1f8786..a899a0c 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1630,19 +1630,10 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
       if (isa<IntegerType>(EltTy)) {
         for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
           Record.push_back(CDS->getElementAsInteger(i));
-      } else if (EltTy->isFloatTy()) {
-        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
-          union { float F; uint32_t I; };
-          F = CDS->getElementAsFloat(i);
-          Record.push_back(I);
-        }
       } else {
-        assert(EltTy->isDoubleTy() && "Unknown ConstantData element type");
-        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
-          union { double F; uint64_t I; };
-          F = CDS->getElementAsDouble(i);
-          Record.push_back(I);
-        }
+        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
+          Record.push_back(
+              CDS->getElementAsAPFloat(i).bitcastToAPInt().getLimitedValue());
       }
     } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
                isa<ConstantVector>(C)) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index be7eafb..5f67d3d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -192,22 +192,26 @@ bool AsmPrinter::doInitialization(Module &M) {
   // use the directive, where it would need the same conditionalization
   // anyway.
   Triple TT(getTargetTriple());
-  if (TT.isOSDarwin()) {
+  // If there is a version specified, Major will be non-zero.
+  if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
     unsigned Major, Minor, Update;
-    TT.getOSVersion(Major, Minor, Update);
-    // If there is a version specified, Major will be non-zero.
-    if (Major) {
-      MCVersionMinType VersionType;
-      if (TT.isWatchOS())
-        VersionType = MCVM_WatchOSVersionMin;
-      else if (TT.isTvOS())
-        VersionType = MCVM_TvOSVersionMin;
-      else if (TT.isMacOSX())
-        VersionType = MCVM_OSXVersionMin;
-      else
-        VersionType = MCVM_IOSVersionMin;
-      OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
+    MCVersionMinType VersionType;
+    if (TT.isWatchOS()) {
+      VersionType = MCVM_WatchOSVersionMin;
+      TT.getWatchOSVersion(Major, Minor, Update);
+    } else if (TT.isTvOS()) {
+      VersionType = MCVM_TvOSVersionMin;
+      TT.getiOSVersion(Major, Minor, Update);
+    } else if (TT.isMacOSX()) {
+      VersionType = MCVM_OSXVersionMin;
+      if (!TT.getMacOSXVersion(Major, Minor, Update))
+        Major = 0;
+    } else {
+      VersionType = MCVM_IOSVersionMin;
+      TT.getiOSVersion(Major, Minor, Update);
     }
+    if (Major != 0)
+      OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
   }
 
   // Allow the target to emit any magic that it wants at the start of the file.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index bf794f7..7b0cdbd 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -32,6 +32,39 @@
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
+// EmittingAsmStreamer Implementation
+//===----------------------------------------------------------------------===//
+unsigned EmittingAsmStreamer::emitULEB128(uint64_t Value, const char *Desc,
+                                          unsigned PadTo) {
+  AP->EmitULEB128(Value, Desc, PadTo);
+  return 0;
+}
+
+unsigned EmittingAsmStreamer::emitInt8(unsigned char Value) {
+  AP->EmitInt8(Value);
+  return 0;
+}
+
+unsigned EmittingAsmStreamer::emitBytes(StringRef Data) {
+  AP->OutStreamer->EmitBytes(Data);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// SizeReporterAsmStreamer Implementation
+//===----------------------------------------------------------------------===//
+unsigned SizeReporterAsmStreamer::emitULEB128(uint64_t Value, const char *Desc,
+                                              unsigned PadTo) {
+  return getULEB128Size(Value);
+}
+
+unsigned SizeReporterAsmStreamer::emitInt8(unsigned char Value) { return 1; }
+
+unsigned SizeReporterAsmStreamer::emitBytes(StringRef Data) {
+  return Data.size();
+}
+
+//===----------------------------------------------------------------------===//
 // DIEAbbrevData Implementation
 //===----------------------------------------------------------------------===//
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1f0c06f..a4fb07e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -561,6 +561,8 @@ void DwarfDebug::finalizeModuleInfo() {
   // Collect info for variables that were optimized out.
   collectDeadVariables();
 
+  unsigned MacroOffset = 0;
+  std::unique_ptr<AsmStreamerBase> AS(new SizeReporterAsmStreamer(Asm));
   // Handle anything that needs to be done on a per-unit basis after
   // all other generation.
   for (const auto &P : CUMap) {
@@ -613,6 +615,15 @@ void DwarfDebug::finalizeModuleInfo() {
         U.setBaseAddress(TheCU.getRanges().front().getStart());
       U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
     }
+
+    auto *CUNode = cast<DICompileUnit>(P.first);
+    if (CUNode->getMacros()) {
+      // Compile Unit has macros, emit "DW_AT_macro_info" attribute.
+      U.addUInt(U.getUnitDie(), dwarf::DW_AT_macro_info,
+                dwarf::DW_FORM_sec_offset, MacroOffset);
+      // Update macro section offset
+      MacroOffset += handleMacroNodes(AS.get(), CUNode->getMacros(), U);
+    }
   }
 
   // Compute DIE offsets and sizes.
@@ -656,6 +667,9 @@ void DwarfDebug::endModule() {
   // Emit info into a debug ranges section.
   emitDebugRanges();
 
+  // Emit info into a debug macinfo section.
+  emitDebugMacinfo();
+
   if (useSplitDwarf()) {
     emitDebugStrDWO();
     emitDebugInfoDWO();
@@ -1833,6 +1847,70 @@ void DwarfDebug::emitDebugRanges() {
   }
 }
 
+unsigned DwarfDebug::handleMacroNodes(AsmStreamerBase *AS,
+                                      DIMacroNodeArray Nodes,
+                                      DwarfCompileUnit &U) {
+  unsigned Size = 0;
+  for (auto *MN : Nodes) {
+    if (auto *M = dyn_cast<DIMacro>(MN))
+      Size += emitMacro(AS, *M);
+    else if (auto *F = dyn_cast<DIMacroFile>(MN))
+      Size += emitMacroFile(AS, *F, U);
+    else
+      llvm_unreachable("Unexpected DI type!");
+  }
+  return Size;
+}
+
+unsigned DwarfDebug::emitMacro(AsmStreamerBase *AS, DIMacro &M) {
+  int Size = 0;
+  Size += AS->emitULEB128(M.getMacinfoType());
+  Size += AS->emitULEB128(M.getLine());
+  StringRef Name = M.getName();
+  StringRef Value = M.getValue();
+  Size += AS->emitBytes(Name);
+  if (!Value.empty()) {
+    // There should be one space between macro name and macro value.
+    Size += AS->emitInt8(' ');
+    Size += AS->emitBytes(Value);
+  }
+  Size += AS->emitInt8('\0');
+  return Size;
+}
+
+unsigned DwarfDebug::emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F,
+                                   DwarfCompileUnit &U) {
+  int Size = 0;
+  assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
+  Size += AS->emitULEB128(dwarf::DW_MACINFO_start_file);
+  Size += AS->emitULEB128(F.getLine());
+  DIFile *File = F.getFile();
+  unsigned FID =
+      U.getOrCreateSourceID(File->getFilename(), File->getDirectory());
+  Size += AS->emitULEB128(FID);
+  Size += handleMacroNodes(AS, F.getElements(), U);
+  Size += AS->emitULEB128(dwarf::DW_MACINFO_end_file);
+  return Size;
+}
+
+// Emit visible names into a debug macinfo section.
+void DwarfDebug::emitDebugMacinfo() {
+  if (MCSection *Macinfo = Asm->getObjFileLowering().getDwarfMacinfoSection()) {
+    // Start the dwarf macinfo section.
+    Asm->OutStreamer->SwitchSection(Macinfo);
+  }
+  std::unique_ptr<AsmStreamerBase> AS(new EmittingAsmStreamer(Asm));
+  for (const auto &P : CUMap) {
+    auto &TheCU = *P.second;
+    auto *SkCU = TheCU.getSkeleton();
+    DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+    auto *CUNode = cast<DICompileUnit>(P.first);
+    handleMacroNodes(AS.get(), CUNode->getMacros(), U);
+  }
+  Asm->OutStreamer->AddComment("End Of Macro List Mark");
+  Asm->EmitInt8(0);
+}
+
 // DWARF5 Experimental Separate Dwarf emitters.
 
 void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 4c613a9..460c186 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -400,18 +400,26 @@ class DwarfDebug : public AsmPrinterHandler {
   /// Emit visible names into a debug str section.
   void emitDebugStr();
 
-  /// Emit visible names into a debug loc section.
+  /// Emit variable locations into a debug loc section.
   void emitDebugLoc();
 
-  /// Emit visible names into a debug loc dwo section.
+  /// Emit variable locations into a debug loc dwo section.
   void emitDebugLocDWO();
 
-  /// Emit visible names into a debug aranges section.
+  /// Emit address ranges into a debug aranges section.
   void emitDebugARanges();
 
-  /// Emit visible names into a debug ranges section.
+  /// Emit address ranges into a debug ranges section.
   void emitDebugRanges();
 
+  /// Emit macros into a debug macinfo section.
+  void emitDebugMacinfo();
+  unsigned emitMacro(AsmStreamerBase *AS, DIMacro &M);
+  unsigned emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F,
+                         DwarfCompileUnit &U);
+  unsigned handleMacroNodes(AsmStreamerBase *AS, DIMacroNodeArray Nodes,
+                            DwarfCompileUnit &U);
+
   /// DWARF 5 Experimental Split Dwarf Emitters
 
   /// Initialize common features of skeleton units.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index c2c0f84..1e2f55b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -82,13 +82,24 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
   const MDNode *Scope = DL.getScope();
   if (!Scope)
     return;
+  unsigned LineNumber = DL.getLine();
+  // Skip this line if it is longer than the maximum we can record.
+  if (LineNumber > COFF::CVL_MaxLineNumber)
+    return;
+
+  unsigned ColumnNumber = DL.getCol();
+  // Truncate the column number if it is longer than the maximum we can record.
+  if (ColumnNumber > COFF::CVL_MaxColumnNumber)
+    ColumnNumber = 0;
+
   StringRef Filename = getFullFilepath(Scope);
 
   // Skip this instruction if it has the same file:line as the previous one.
   assert(CurFn);
   if (!CurFn->Instrs.empty()) {
     const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()];
-    if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine())
+    if (LastInstr.Filename == Filename && LastInstr.LineNumber == LineNumber &&
+        LastInstr.ColumnNumber == ColumnNumber)
       return;
   }
   FileNameRegistry.add(Filename);
@@ -96,7 +107,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
   MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
   Asm->OutStreamer->EmitLabel(MCL);
   CurFn->Instrs.push_back(MCL);
-  InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol());
+  InstrInfo[MCL] = InstrInfoTy(Filename, LineNumber, ColumnNumber);
 }
 
 WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
@@ -282,8 +293,9 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
                 ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
          ColSegI != ColSegEnd; ++ColSegI) {
       unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber;
+      assert(ColumnNumber <= COFF::CVL_MaxColumnNumber);
       Asm->EmitInt16(ColumnNumber); // Start column
-      Asm->EmitInt16(ColumnNumber); // End column
+      Asm->EmitInt16(0);            // End column
     }
     Asm->OutStreamer->EmitLabel(FileSegmentEnd);
   };
@@ -320,7 +332,10 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
 
     // The first PC with the given linenumber and the linenumber itself.
     EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
-    Asm->EmitInt32(InstrInfo[Instr].LineNumber);
+    uint32_t LineNumber = InstrInfo[Instr].LineNumber;
+    assert(LineNumber <= COFF::CVL_MaxLineNumber);
+    uint32_t LineData = LineNumber | COFF::CVL_IsStatement;
+    Asm->EmitInt32(LineData);
   }
 
   FinishPreviousChunk();
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 604feed..df5cac5 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -744,18 +744,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
   return true;
 }
 
-static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) {
-  auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end();
-  auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end();
-  if ((E1 - I1) != (E2 - I2))
-    return false;
-  for (; I1 != E1; ++I1, ++I2) {
-    if (**I1 != **I2)
-      return false;
-  }
-  return true;
-}
-
 static void
 removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
                                MachineBasicBlock &MBBCommon) {
@@ -792,8 +780,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
     assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
 
     if (MBBICommon->mayLoad() || MBBICommon->mayStore())
-      if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
-        MBBICommon->dropMemRefs();
+      MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
 
     ++MBBI;
     ++MBBICommon;
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6fbdea8..03e5778 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1108,7 +1108,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
 //                               <16 x i1> %mask, <16 x i32> %passthru)
 // to a chain of basic blocks, with loading element one-by-one if
 // the appropriate mask bit is set
-// 
+//
 //  %1 = bitcast i8* %addr to i32*
 //  %2 = extractelement <16 x i1> %mask, i32 0
 //  %3 = icmp eq i1 %2, true
@@ -1272,12 +1272,12 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
 //   %5 = getelementptr i32* %1, i32 0
 //   store i32 %4, i32* %5
 //   br label %else
-// 
+//
 // else:                                             ; preds = %0, %cond.store
 //   %6 = extractelement <16 x i1> %mask, i32 1
 //   %7 = icmp eq i1 %6, true
 //   br i1 %7, label %cond.store1, label %else2
-// 
+//
 // cond.store1:                                      ; preds = %else
 //   %8 = extractelement <16 x i32> %val, i32 1
 //   %9 = getelementptr i32* %1, i32 1
@@ -1377,24 +1377,24 @@ static void ScalarizeMaskedStore(CallInst *CI) {
 //                               <16 x i1> %Mask, <16 x i32> %Src)
 // to a chain of basic blocks, with loading element one-by-one if
 // the appropriate mask bit is set
-// 
+//
 // % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
 // % Mask0 = extractelement <16 x i1> %Mask, i32 0
 // % ToLoad0 = icmp eq i1 % Mask0, true
 // br i1 % ToLoad0, label %cond.load, label %else
-// 
+//
 // cond.load:
 // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
 // % Load0 = load i32, i32* % Ptr0, align 4
 // % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
 // br label %else
-// 
+//
 // else:
 // %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
 // % Mask1 = extractelement <16 x i1> %Mask, i32 1
 // % ToLoad1 = icmp eq i1 % Mask1, true
 // br i1 % ToLoad1, label %cond.load1, label %else2
-// 
+//
 // cond.load1:
 // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
 // % Load1 = load i32, i32* % Ptr1, align 4
@@ -1526,7 +1526,7 @@ static void ScalarizeMaskedGather(CallInst *CI) {
 // % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
 // store i32 %Elt0, i32* % Ptr0, align 4
 // br label %else
-// 
+//
 // else:
 // % Mask1 = extractelement <16 x i1> % Mask, i32 1
 // % ToStore1 = icmp eq i1 % Mask1, true
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index 98d30b9..b9937e5 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -19,6 +19,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -30,7 +32,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include <deque>
+#include <queue>
 #include <list>
 
 using namespace llvm;
@@ -76,16 +78,13 @@ private:
   typedef std::list<VarLoc> VarLocList;
   typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB;
 
-  bool OLChanged; // OutgoingLocs got changed for this bb.
-  bool MBBJoined; // The MBB was joined.
-
   void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges);
   void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges);
-  void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
+  bool transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
                               VarLocInMBB &OutLocs);
-  void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
+  bool transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
 
-  void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
+  bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
 
   bool ExtendRanges(MachineFunction &MF);
 
@@ -225,24 +224,18 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
 }
 
 /// Terminate all open ranges at the end of the current basic block.
-void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
+bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
                                              VarLocList &OpenRanges,
                                              VarLocInMBB &OutLocs) {
+  bool Changed = false;
   const MachineBasicBlock *CurMBB = MI.getParent();
   if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
-    return;
+    return false;
 
   if (OpenRanges.empty())
-    return;
+    return false;
 
-  if (OutLocs.find(CurMBB) == OutLocs.end()) {
-    // Create space for new Outgoing locs entries.
-    VarLocList VLL;
-    OutLocs.insert(std::make_pair(CurMBB, std::move(VLL)));
-  }
-  auto OL = OutLocs.find(CurMBB);
-  assert(OL != OutLocs.end());
-  VarLocList &VLL = OL->second;
+  VarLocList &VLL = OutLocs[CurMBB];
 
   for (auto OR : OpenRanges) {
     // Copy OpenRanges to OutLocs, if not already present.
@@ -251,28 +244,30 @@ void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
     if (std::find_if(VLL.begin(), VLL.end(),
                      [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) {
       VLL.push_back(std::move(OR));
-      OLChanged = true;
+      Changed = true;
     }
   }
   OpenRanges.clear();
+  return Changed;
 }
 
 /// This routine creates OpenRanges and OutLocs.
-void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
+bool LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
                                VarLocInMBB &OutLocs) {
+  bool Changed = false;
   transferDebugValue(MI, OpenRanges);
   transferRegisterDef(MI, OpenRanges);
-  transferTerminatorInst(MI, OpenRanges, OutLocs);
+  Changed = transferTerminatorInst(MI, OpenRanges, OutLocs);
+  return Changed;
 }
 
 /// This routine joins the analysis results of all incoming edges in @MBB by
 /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
 /// source variable in all the predecessors of @MBB reside in the same location.
-void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
+bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
                            VarLocInMBB &InLocs) {
   DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
-
-  MBBJoined = false;
+  bool Changed = false;
 
   VarLocList InLocsT; // Temporary incoming locations.
 
@@ -282,7 +277,7 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
     auto OL = OutLocs.find(p);
     // Join is null in case of empty OutLocs from any of the pred.
     if (OL == OutLocs.end())
-      return;
+      return false;
 
     // Just copy over the Out locs to incoming locs for the first predecessor.
     if (p == *MBB.pred_begin()) {
@@ -292,27 +287,18 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
 
     // Join with this predecessor.
     VarLocList &VLL = OL->second;
-    InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(),
-                                 [&](VarLoc &ILT) {
-                                   return (std::find_if(VLL.begin(), VLL.end(),
-                                                        [&](const VarLoc &V) {
-                                                          return (ILT == V);
-                                                        }) == VLL.end());
-                                 }),
-                  InLocsT.end());
+    InLocsT.erase(
+        std::remove_if(InLocsT.begin(), InLocsT.end(), [&](VarLoc &ILT) {
+          return (std::find_if(VLL.begin(), VLL.end(), [&](const VarLoc &V) {
+                    return (ILT == V);
+                  }) == VLL.end());
+        }), InLocsT.end());
   }
 
   if (InLocsT.empty())
-    return;
+    return false;
 
-  if (InLocs.find(&MBB) == InLocs.end()) {
-    // Create space for new Incoming locs entries.
-    VarLocList VLL;
-    InLocs.insert(std::make_pair(&MBB, std::move(VLL)));
-  }
-  auto IL = InLocs.find(&MBB);
-  assert(IL != InLocs.end());
-  VarLocList &ILL = IL->second;
+  VarLocList &ILL = InLocs[&MBB];
 
   // Insert DBG_VALUE instructions, if not already inserted.
   for (auto ILT : InLocsT) {
@@ -331,12 +317,13 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
         MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
       DEBUG(dbgs() << "Inserted: "; MI->dump(););
       ++NumInserted;
-      MBBJoined = true; // rerun transfer().
+      Changed = true;
 
       VarLoc V(ILT.Var, MI);
       ILL.push_back(std::move(V));
     }
   }
+  return Changed;
 }
 
 /// Calculate the liveness information for the given machine function and
@@ -346,48 +333,72 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
   DEBUG(dbgs() << "\nDebug Range Extension\n");
 
   bool Changed = false;
-  OLChanged = MBBJoined = false;
+  bool OLChanged = false;
+  bool MBBJoined = false;
 
   VarLocList OpenRanges; // Ranges that are open until end of bb.
   VarLocInMBB OutLocs;   // Ranges that exist beyond bb.
   VarLocInMBB InLocs;    // Ranges that are incoming after joining.
 
-  std::deque<MachineBasicBlock *> BBWorklist;
-
+  DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+  DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
+  std::priority_queue<unsigned int, std::vector<unsigned int>,
+                      std::greater<unsigned int>> Worklist;
+  std::priority_queue<unsigned int, std::vector<unsigned int>,
+                      std::greater<unsigned int>> Pending;
   // Initialize every mbb with OutLocs.
   for (auto &MBB : MF)
     for (auto &MI : MBB)
       transfer(MI, OpenRanges, OutLocs);
   DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs()));
 
-  // Construct a worklist of MBBs.
-  for (auto &MBB : MF)
-    BBWorklist.push_back(&MBB);
-
-  // Perform join() and transfer() using the worklist until the ranges converge
-  // Ranges have converged when the worklist is empty.
-  while (!BBWorklist.empty()) {
-    MachineBasicBlock *MBB = BBWorklist.front();
-    BBWorklist.pop_front();
-
-    join(*MBB, OutLocs, InLocs);
+  ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+  unsigned int RPONumber = 0;
+  for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+    OrderToBB[RPONumber] = *RI;
+    BBToOrder[*RI] = RPONumber;
+    Worklist.push(RPONumber);
+    ++RPONumber;
+  }
 
-    if (MBBJoined) {
-      Changed = true;
-      for (auto &MI : *MBB)
-        transfer(MI, OpenRanges, OutLocs);
-      DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
-      DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
-
-      if (OLChanged) {
-        OLChanged = false;
-        for (auto s : MBB->successors())
-          if (std::find(BBWorklist.begin(), BBWorklist.end(), s) ==
-              BBWorklist.end()) // add if not already present.
-            BBWorklist.push_back(s);
+  // This is a standard "union of predecessor outs" dataflow problem.
+  // To solve it, we perform join() and transfer() using the two worklist method
+  // until the ranges converge.
+  // Ranges have converged when both worklists are empty.
+  while (!Worklist.empty() || !Pending.empty()) {
+    // We track what is on the pending worklist to avoid inserting the same
+    // thing twice.  We could avoid this with a custom priority queue, but this
+    // is probably not worth it.
+    SmallPtrSet<MachineBasicBlock *, 16> OnPending;
+    while (!Worklist.empty()) {
+      MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
+      Worklist.pop();
+      MBBJoined = join(*MBB, OutLocs, InLocs);
+
+      if (MBBJoined) {
+        MBBJoined = false;
+        Changed = true;
+        for (auto &MI : *MBB)
+          OLChanged |= transfer(MI, OpenRanges, OutLocs);
+        DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
+        DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
+
+        if (OLChanged) {
+          OLChanged = false;
+          for (auto s : MBB->successors())
+            if (!OnPending.count(s)) {
+              OnPending.insert(s);
+              Pending.push(BBToOrder[s]);
+            }
+        }
       }
     }
+    Worklist.swap(Pending);
+    // At this point, pending must be empty, since it was just the empty
+    // worklist
+    assert(Pending.empty() && "Pending should be empty");
   }
+
   DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs()));
   DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs()));
   return Changed;
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index efad36f..bb34883 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -1328,15 +1328,15 @@ void LiveRangeUpdater::flush() {
   LR->verify();
 }
 
-unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
   // Create initial equivalence classes.
   EqClass.clear();
-  EqClass.grow(LI->getNumValNums());
+  EqClass.grow(LR.getNumValNums());
 
   const VNInfo *used = nullptr, *unused = nullptr;
 
   // Determine connections.
-  for (const VNInfo *VNI : LI->valnos) {
+  for (const VNInfo *VNI : LR.valnos) {
     // Group all unused values into one class.
     if (VNI->isUnused()) {
       if (unused)
@@ -1351,14 +1351,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
       // Connect to values live out of predecessors.
       for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
            PE = MBB->pred_end(); PI != PE; ++PI)
-        if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
+        if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
           EqClass.join(VNI->id, PVNI->id);
     } else {
       // Normal value defined by an instruction. Check for two-addr redef.
       // FIXME: This could be coincidental. Should we really check for a tied
       // operand constraint?
       // Note that VNI->def may be a use slot for an early clobber def.
-      if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
+      if (const VNInfo *UVNI = LR.getVNInfoBefore(VNI->def))
         EqClass.join(VNI->id, UVNI->id);
     }
   }
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 9451d92..a506e05 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1446,7 +1446,7 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
 void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
     SmallVectorImpl<LiveInterval*> &SplitLIs) {
   ConnectedVNInfoEqClasses ConEQ(*this);
-  unsigned NumComp = ConEQ.Classify(&LI);
+  unsigned NumComp = ConEQ.Classify(LI);
   if (NumComp <= 1)
     return;
   DEBUG(dbgs() << "  Split " << NumComp << " components: " << LI << '\n');
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 76099f2..85d544d 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1182,7 +1182,7 @@ MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
 
 /// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
 /// as of just before "MI".
-/// 
+///
 /// Search is localised to a neighborhood of
 /// Neighborhood instructions before (searching for defs or kills) and N
 /// instructions after (searching just for defs) MI.
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 790f5ac..4f424ff 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -31,7 +31,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
   const std::string Banner;
 
   MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
-  MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) 
+  MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
       : MachineFunctionPass(ID), OS(os), Banner(banner) {}
 
   const char *getPassName() const override { return "MachineFunction Printer"; }
@@ -42,6 +42,8 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
+    if (!llvm::isFunctionInPrintList(MF.getName()))
+      return false;
     OS << "# " << Banner << ":\n";
     MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
     return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 6b8eecc..6dca74d 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -866,14 +866,44 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
   setMemRefs(NewMemRefs, NewMemRefs + NewNum);
 }
 
+/// Check to see if the MMOs pointed to by the two MemRefs arrays are
+/// identical. 
+static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) {
+  auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end();
+  auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end();
+  if ((E1 - I1) != (E2 - I2))
+    return false;
+  for (; I1 != E1; ++I1, ++I2) {
+    if (**I1 != **I2)
+      return false;
+  }
+  return true;
+}
+
 std::pair<MachineInstr::mmo_iterator, unsigned>
 MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
-  // TODO: If we end up with too many memory operands, return the empty
-  // conservative set rather than failing asserts.
+
+  // If either of the incoming memrefs are empty, we must be conservative and
+  // treat this as if we've exhausted our space for memrefs and dropped them.
+  if (memoperands_empty() || Other.memoperands_empty())
+    return std::make_pair(nullptr, 0);
+
+  // If both instructions have identical memrefs, we don't need to merge them.
+  // Since many instructions have a single memref, and we tend to merge things
+  // like pairs of loads from the same location, this catches a large number of
+  // cases in practice.
+  if (hasIdenticalMMOs(*this, Other))
+    return std::make_pair(MemRefs, NumMemRefs);
+  
   // TODO: consider uniquing elements within the operand lists to reduce
   // space usage and fall back to conservative information less often.
-  size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin())
-    + (Other.memoperands_end() - Other.memoperands_begin());
+  size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs;
+
+  // If we don't have enough room to store this many memrefs, be conservative
+  // and drop them.  Otherwise, we'd fail asserts when trying to add them to
+  // the new instruction.
+  if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
+    return std::make_pair(nullptr, 0);
 
   MachineFunction *MF = getParent()->getParent();
   mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index a8368e9..99a97d2 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -334,12 +334,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
   // writes to all slots. 
   if (MI->memoperands_empty())
     return true;
-  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
-         oe = MI->memoperands_end(); o != oe; ++o) {
-    if (!(*o)->isStore() || !(*o)->getPseudoValue())
+  for (const MachineMemOperand *MemOp : MI->memoperands()) {
+    if (!MemOp->isStore() || !MemOp->getPseudoValue())
       continue;
     if (const FixedStackPseudoSourceValue *Value =
-        dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) {
+        dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
       if (Value->getFrameIndex() == FI)
         return true;
     }
@@ -357,8 +356,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
   bool RuledOut = false;
   bool HasNonInvariantUse = false;
   unsigned Def = 0;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     if (MO.isFI()) {
       // Remember if the instruction stores to the frame index.
       int FI = MO.getIndex();
@@ -452,9 +450,7 @@ void MachineLICM::HoistRegionPostRA() {
   // Walk the entire region, count number of defs for each register, and
   // collect potential LICM candidates.
   const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
-  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
-    MachineBasicBlock *BB = Blocks[i];
-
+  for (MachineBasicBlock *BB : Blocks) {
     // If the header of the loop containing this basic block is a landing pad,
     // then don't try to hoist instructions out of this loop.
     const MachineLoop *ML = MLI->getLoopFor(BB);
@@ -469,19 +465,15 @@ void MachineLICM::HoistRegionPostRA() {
     }
 
     SpeculationState = SpeculateUnknown;
-    for (MachineBasicBlock::iterator
-           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
-      MachineInstr *MI = &*MII;
-      ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
-    }
+    for (MachineInstr &MI : *BB)
+      ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
   }
 
   // Gather the registers read / clobbered by the terminator.
   BitVector TermRegs(NumRegs);
   MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
   if (TI != Preheader->end()) {
-    for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = TI->getOperand(i);
+    for (const MachineOperand &MO : TI->operands()) {
       if (!MO.isReg())
         continue;
       unsigned Reg = MO.getReg();
@@ -500,17 +492,16 @@ void MachineLICM::HoistRegionPostRA() {
   // 3. Make sure candidate def should not clobber
   //    registers read by the terminator. Similarly its def should not be
   //    clobbered by the terminator.
-  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
-    if (Candidates[i].FI != INT_MIN &&
-        StoredFIs.count(Candidates[i].FI))
+  for (CandidateInfo &Candidate : Candidates) {
+    if (Candidate.FI != INT_MIN &&
+        StoredFIs.count(Candidate.FI))
       continue;
 
-    unsigned Def = Candidates[i].Def;
+    unsigned Def = Candidate.Def;
     if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
       bool Safe = true;
-      MachineInstr *MI = Candidates[i].MI;
-      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
-        const MachineOperand &MO = MI->getOperand(j);
+      MachineInstr *MI = Candidate.MI;
+      for (const MachineOperand &MO : MI->operands()) {
         if (!MO.isReg() || MO.isDef() || !MO.getReg())
           continue;
         unsigned Reg = MO.getReg();
@@ -523,7 +514,7 @@ void MachineLICM::HoistRegionPostRA() {
         }
       }
       if (Safe)
-        HoistPostRA(MI, Candidates[i].Def);
+        HoistPostRA(MI, Candidate.Def);
     }
   }
 }
@@ -532,15 +523,11 @@ void MachineLICM::HoistRegionPostRA() {
 /// sure it is not killed by any instructions in the loop.
 void MachineLICM::AddToLiveIns(unsigned Reg) {
   const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
-  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
-    MachineBasicBlock *BB = Blocks[i];
+  for (MachineBasicBlock *BB : Blocks) {
     if (!BB->isLiveIn(Reg))
       BB->addLiveIn(Reg);
-    for (MachineBasicBlock::iterator
-           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
-      MachineInstr *MI = &*MII;
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
+    for (MachineInstr &MI : *BB) {
+      for (MachineOperand &MO : MI.operands()) {
         if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
         if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
           MO.setIsKill(false);
@@ -582,8 +569,8 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
     // Check loop exiting blocks.
     SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
     CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
-    for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
-      if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
+    for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
+      if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
         SpeculationState = SpeculateTrue;
         return false;
       }
@@ -689,8 +676,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
   InitRegPressure(Preheader);
 
   // Now perform LICM.
-  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
-    MachineDomTreeNode *Node = Scopes[i];
+  for (MachineDomTreeNode *Node : Scopes) {
     MachineBasicBlock *MBB = Node->getBlock();
 
     EnterScope(MBB);
@@ -858,13 +844,11 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
   if (MI.memoperands_empty())
     return true;
 
-  for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
-         E = MI.memoperands_end(); I != E; ++I) {
-    if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
+  for (MachineMemOperand *MemOp : MI.memoperands())
+    if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
       if (PSV->isGOT() || PSV->isConstantPool())
         return true;
-    }
-  }
+
   return false;
 }
 
@@ -899,9 +883,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     return false;
 
   // The instruction is loop invariant if all of its operands are.
-  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = I.getOperand(i);
-
+  for (const MachineOperand &MO : I.operands()) {
     if (!MO.isReg())
       continue;
 
@@ -1230,11 +1212,8 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
 /// preheader that may become duplicates of instructions that are hoisted
 /// out of the loop.
 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
-  for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
-    const MachineInstr *MI = &*I;
-    unsigned Opcode = MI->getOpcode();
-    CSEMap[Opcode].push_back(MI);
-  }
+  for (MachineInstr &MI : *BB)
+    CSEMap[MI.getOpcode()].push_back(&MI);
 }
 
 /// Find an instruction amount PrevMIs that is a duplicate of MI.
@@ -1242,11 +1221,10 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
 const MachineInstr*
 MachineLICM::LookForDuplicate(const MachineInstr *MI,
                               std::vector<const MachineInstr*> &PrevMIs) {
-  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
-    const MachineInstr *PrevMI = PrevMIs[i];
+  for (const MachineInstr *PrevMI : PrevMIs)
     if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr)))
       return PrevMI;
-  }
+
   return nullptr;
 }
 
@@ -1296,8 +1274,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
       }
     }
 
-    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
-      unsigned Idx = Defs[i];
+    for (unsigned Idx : Defs) {
       unsigned Reg = MI->getOperand(Idx).getReg();
       unsigned DupReg = Dup->getOperand(Idx).getReg();
       MRI->replaceRegWith(Reg, DupReg);
@@ -1370,11 +1347,9 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
     // Clear the kill flags of any register this instruction defines,
     // since they may need to be live throughout the entire loop
     // rather than just live for part of it.
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
+    for (MachineOperand &MO : MI->operands())
       if (MO.isReg() && MO.isDef() && !MO.isDead())
         MRI->clearKillFlags(MO.getReg());
-    }
 
     // Add to the CSE map.
     if (CI != CSEMap.end())
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index cdcd8eb..428295e 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1736,7 +1736,7 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
 
   // Check the LI only has one connected component.
   ConnectedVNInfoEqClasses ConEQ(*LiveInts);
-  unsigned NumComp = ConEQ.Classify(&LI);
+  unsigned NumComp = ConEQ.Classify(LI);
   if (NumComp > 1) {
     report("Multiple connected components in live interval", MF);
     report_context(LI);
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index e7b3217..c1ff13e 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -2874,7 +2874,7 @@ void RegisterCoalescer::joinAllIntervals() {
 
   std::vector<MBBPriorityInfo> MBBs;
   MBBs.reserve(MF->size());
-  for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
     MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
                                    JoinSplitEdges && isSplitEdge(MBB)));
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 3749b1d..f33dc3e 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -313,21 +313,6 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
 
 namespace {
 
-/// List of register defined and used by a machine instruction.
-class RegisterOperands {
-public:
-  SmallVector<unsigned, 8> Uses;
-  SmallVector<unsigned, 8> Defs;
-  SmallVector<unsigned, 8> DeadDefs;
-
-  void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
-               const MachineRegisterInfo &MRI, bool IgnoreDead = false);
-
-  /// Use liveness information to find dead defs not marked with a dead flag
-  /// and move them to the DeadDefs vector.
-  void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
-};
-
 /// Collect this instruction's unique uses and defs into SmallVectors for
 /// processing defs and uses in order.
 ///
@@ -385,9 +370,11 @@ class RegisterOperandsCollector {
     }
   }
 
-  friend class RegisterOperands;
+  friend class llvm::RegisterOperands;
 };
 
+} // namespace
+
 void RegisterOperands::collect(const MachineInstr &MI,
                                const TargetRegisterInfo &TRI,
                                const MachineRegisterInfo &MRI,
@@ -417,8 +404,6 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
   }
 }
 
-} // namespace
-
 /// Initialize an array of N PressureDiffs.
 void PressureDiffs::init(unsigned N) {
   Size = N;
@@ -431,6 +416,18 @@ void PressureDiffs::init(unsigned N) {
   PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff)));
 }
 
+void PressureDiffs::addInstruction(unsigned Idx,
+                                   const RegisterOperands &RegOpers,
+                                   const MachineRegisterInfo &MRI) {
+  PressureDiff &PDiff = (*this)[Idx];
+  assert(!PDiff.begin()->isValid() && "stale PDiff");
+  for (unsigned Reg : RegOpers.Defs)
+    PDiff.addPressureChange(Reg, true, &MRI);
+
+  for (unsigned Reg : RegOpers.Uses)
+    PDiff.addPressureChange(Reg, false, &MRI);
+}
+
 /// Add a change in pressure to the pressure diff of a given instruction.
 void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
                                      const MachineRegisterInfo *MRI) {
@@ -467,18 +464,6 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
   }
 }
 
-/// Record the pressure difference induced by the given operand list.
-static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers,
-                         const MachineRegisterInfo *MRI) {
-  assert(!PDiff.begin()->isValid() && "stale PDiff");
-
-  for (unsigned Reg : RegOpers.Defs)
-    PDiff.addPressureChange(Reg, true, MRI);
-
-  for (unsigned Reg : RegOpers.Uses)
-    PDiff.addPressureChange(Reg, false, MRI);
-}
-
 /// Force liveness of registers.
 void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
   for (unsigned Reg : Regs) {
@@ -514,39 +499,10 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) {
 /// registers that are both defined and used by the instruction.  If a pressure
 /// difference pointer is provided record the changes is pressure caused by this
 /// instruction independent of liveness.
-void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
-                                PressureDiff *PDiff) {
-  assert(CurrPos != MBB->begin());
-  if (!isBottomClosed())
-    closeBottom();
-
-  // Open the top of the region using block iterators.
-  if (!RequireIntervals && isTopClosed())
-    static_cast<RegionPressure&>(P).openTop(CurrPos);
-
-  // Find the previous instruction.
-  do
-    --CurrPos;
-  while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+void RegPressureTracker::recede(const RegisterOperands &RegOpers,
+                                SmallVectorImpl<unsigned> *LiveUses) {
   assert(!CurrPos->isDebugValue());
 
-  SlotIndex SlotIdx;
-  if (RequireIntervals)
-    SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
-
-  // Open the top of the region using slot indexes.
-  if (RequireIntervals && isTopClosed())
-    static_cast<IntervalPressure&>(P).openTop(SlotIdx);
-
-  const MachineInstr &MI = *CurrPos;
-  RegisterOperands RegOpers;
-  RegOpers.collect(MI, *TRI, *MRI);
-  if (RequireIntervals)
-    RegOpers.detectDeadDefs(MI, *LIS);
-
-  if (PDiff)
-    collectPDiff(*PDiff, RegOpers, MRI);
-
   // Boost pressure for all dead defs together.
   increaseRegPressure(RegOpers.DeadDefs);
   decreaseRegPressure(RegOpers.DeadDefs);
@@ -560,6 +516,10 @@ void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
       discoverLiveOut(Reg);
   }
 
+  SlotIndex SlotIdx;
+  if (RequireIntervals)
+    SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
   // Generate liveness for uses.
   for (unsigned Reg : RegOpers.Uses) {
     if (!LiveRegs.contains(Reg)) {
@@ -586,6 +546,41 @@ void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
   }
 }
 
+void RegPressureTracker::recedeSkipDebugValues() {
+  assert(CurrPos != MBB->begin());
+  if (!isBottomClosed())
+    closeBottom();
+
+  // Open the top of the region using block iterators.
+  if (!RequireIntervals && isTopClosed())
+    static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+  // Find the previous instruction.
+  do
+    --CurrPos;
+  while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+
+  SlotIndex SlotIdx;
+  if (RequireIntervals)
+    SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+  // Open the top of the region using slot indexes.
+  if (RequireIntervals && isTopClosed())
+    static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+}
+
+void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses) {
+  recedeSkipDebugValues();
+
+  const MachineInstr &MI = *CurrPos;
+  RegisterOperands RegOpers;
+  RegOpers.collect(MI, *TRI, *MRI);
+  if (RequireIntervals)
+    RegOpers.detectDeadDefs(MI, *LIS);
+
+  recede(RegOpers, LiveUses);
+}
+
 /// Advance across the current instruction.
 void RegPressureTracker::advance() {
   assert(!TrackUntiedDefs && "unsupported mode");
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index fb82ab7..11b246a 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -896,11 +896,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
     assert(SU && "No SUnit mapped to this MI");
 
     if (RPTracker) {
-      PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
-      RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
-      assert(RPTracker->getPos() == std::prev(MII) &&
-             "RPTracker can't find MI");
       collectVRegUses(SU);
+
+      RegisterOperands RegOpers;
+      RegOpers.collect(*MI, *TRI, MRI);
+      if (PDiffs != nullptr)
+        PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
+
+      RPTracker->recedeSkipDebugValues();
+      assert(&*RPTracker->getPos() == MI && "RPTracker in sync");
+      RPTracker->recede(RegOpers);
     }
 
     assert(
@@ -1005,6 +1010,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                              I->second[i], RejectMemNodes, TrueMemOrderLatency);
       }
+      // This call must come after calls to addChainDependency() since it
+      // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+      // adds to.
       adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
       PendingLoads.clear();
@@ -1086,6 +1094,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
                              RejectMemNodes);
       }
+      // This call must come after calls to addChainDependency() since it
+      // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+      // adds to.
       adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
     } else if (MI->mayLoad()) {
@@ -1133,13 +1144,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           else
             NonAliasMemUses[V].push_back(SU);
         }
-        if (MayAlias)
-          adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
-                          RejectMemNodes, /*Latency=*/0);
         // Add dependencies on alias and barrier chains, if needed.
         if (MayAlias && AliasChain)
           addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
                              RejectMemNodes);
+        if (MayAlias)
+          // This call must come after calls to addChainDependency() since it
+          // consumes the 'RejectMemNodes' list that addChainDependency()
+          // possibly adds to.
+          adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
+                          RejectMemNodes, /*Latency=*/0);
         if (BarrierChain)
           BarrierChain->addPred(SDep(SU, SDep::Barrier));
       }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bc2405b9..c741982 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7325,6 +7325,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // fold (bitcast (fneg x)) ->
   //     flipbit = signbit
   //     (xor (bitcast x) (build_pair flipbit, flipbit))
+  //
   // fold (bitcast (fabs x)) ->
   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
   //     (xor (bitcast x) (build_pair flipbit, flipbit))
@@ -8794,20 +8795,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
                      ZeroCmp, Zero, RV);
 }
 
+/// copysign(x, fp_extend(y)) -> copysign(x, y)
+/// copysign(x, fp_round(y)) -> copysign(x, y)
 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
-  // copysign(x, fp_extend(y)) -> copysign(x, y)
-  // copysign(x, fp_round(y)) -> copysign(x, y)
-  // Do not optimize out type conversion of f128 type yet.
-  // For some target like x86_64, configuration is changed
-  // to keep one f128 value in one SSE register, but
-  // instruction selection cannot handle FCOPYSIGN on
-  // SSE registers yet.
   SDValue N1 = N->getOperand(1);
-  EVT N1VT = N1->getValueType(0);
-  EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
-  return (N1.getOpcode() == ISD::FP_EXTEND ||
-          N1.getOpcode() == ISD::FP_ROUND) &&
-         (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+  if ((N1.getOpcode() == ISD::FP_EXTEND ||
+       N1.getOpcode() == ISD::FP_ROUND)) {
+    // Do not optimize out type conversion of f128 type yet.
+    // For some targets like x86_64, configuration is changed to keep one f128
+    // value in one SSE register, but instruction selection cannot handle
+    // FCOPYSIGN on SSE registers yet.
+    EVT N1VT = N1->getValueType(0);
+    EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+    return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+  }
+  return false;
 }
 
 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b62bd2b..08815ed 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -297,8 +297,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   else if (Personality == EHPersonality::CoreCLR)
     calculateClrEHStateNumbers(&fn, EHInfo);
 
-  calculateCatchReturnSuccessorColors(&fn, EHInfo);
-
   // Map all BB references in the WinEH data to MBBs.
   for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
     for (WinEHHandlerType &H : TBME.HandlerArray) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f46767f..5d572c4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2941,6 +2941,18 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // This trivially expands to CTLZ.
     return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
   case ISD::CTLZ: {
+    EVT VT = Op.getValueType();
+    unsigned len = VT.getSizeInBits();
+
+    if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+      EVT SetCCVT = getSetCCResultType(VT);
+      SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+      SDValue Zero = DAG.getConstant(0, dl, VT);
+      SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+      return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+                         DAG.getConstant(len, dl, VT), CTLZ);
+    }
+
     // for now, we do this:
     // x = x | (x >> 1);
     // x = x | (x >> 2);
@@ -2950,9 +2962,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // return popcount(~x);
     //
     // Ref: "Hacker's Delight" by Henry Warren
-    EVT VT = Op.getValueType();
     EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-    unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
       SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
       Op = DAG.getNode(ISD::OR, dl, VT, Op,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index cd114d6..74f80db 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -262,12 +262,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
   case TargetLowering::TypePromoteFloat: {
     // Convert the promoted float by hand.
-    if (NOutVT.bitsEq(NInVT)) {
-      SDValue PromotedOp = GetPromotedFloat(InOp);
-      SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
-      return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc,
-                         DAG.getValueType(OutVT));
-    }
+    SDValue PromotedOp = GetPromotedFloat(InOp);
+    return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
     break;
   }
   case TargetLowering::TypeExpandInteger:
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e446a93..45ae39a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1205,8 +1205,13 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
   // Figure out the funclet membership for the catchret's successor.
   // This will be used by the FuncletLayout pass to determine how to order the
   // BB's.
-  WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
-  const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I];
+  // A 'catchret' returns to the outer scope's color.
+  Value *ParentPad = I.getParentPad();
+  const BasicBlock *SuccessorColor;
+  if (isa<ConstantTokenNone>(ParentPad))
+    SuccessorColor = &FuncInfo.Fn->getEntryBlock();
+  else
+    SuccessorColor = cast<Instruction>(ParentPad)->getParent();
   assert(SuccessorColor && "No parent funclet for catchret!");
   MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
   assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 6547a62..02545a7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -461,7 +461,9 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
     // If the original value was a constant, make sure it gets recorded as
     // such in the stackmap.  This is required so that the consumer can
     // parse any internal format to the deopt state.  It also handles null
-    // pointers and other constant pointers in GC states
+    // pointers and other constant pointers in GC states.  Note the constant
+    // vectors do not appear to actually hit this path and that anything larger
+    // than an i64 value (not type!) will fail asserts here.
     pushStackMapConstant(Ops, Builder, C->getSExtValue());
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
     // This handles allocas as arguments to the statepoint (this is only
@@ -505,27 +507,27 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
 
 #ifndef NDEBUG
   // Check that each of the gc pointer and bases we've gotten out of the
-  // safepoint is something the strategy thinks might be a pointer into the GC
-  // heap.  This is basically just here to help catch errors during statepoint
-  // insertion. TODO: This should actually be in the Verifier, but we can't get
-  // to the GCStrategy from there (yet).
+  // safepoint is something the strategy thinks might be a pointer (or vector
+  // of pointers) into the GC heap.  This is basically just here to help catch
+  // errors during statepoint insertion. TODO: This should actually be in the
+  // Verifier, but we can't get to the GCStrategy from there (yet).
   GCStrategy &S = Builder.GFI->getStrategy();
   for (const Value *V : Bases) {
-    auto Opt = S.isGCManagedPointer(V->getType());
+    auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
     if (Opt.hasValue()) {
       assert(Opt.getValue() &&
              "non gc managed base pointer found in statepoint");
     }
   }
   for (const Value *V : Ptrs) {
-    auto Opt = S.isGCManagedPointer(V->getType());
+    auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
     if (Opt.hasValue()) {
       assert(Opt.getValue() &&
              "non gc managed derived pointer found in statepoint");
     }
   }
   for (const Value *V : Relocations) {
-    auto Opt = S.isGCManagedPointer(V->getType());
+    auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
     if (Opt.hasValue()) {
       assert(Opt.getValue() && "non gc managed pointer relocated");
     }
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index f8aa1e2..d361a6c 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -47,6 +47,7 @@
 // MachineFrameInfo is updated with this information.
 //===----------------------------------------------------------------------===//
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 // To check for profitability.
@@ -263,6 +264,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
     if (!IDom)
       break;
   }
+  if (IDom == &Block)
+    return nullptr;
   return IDom;
 }
 
@@ -352,13 +355,9 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
       if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
         // Push Save outside of this loop if immediate dominator is different
         // from save block. If immediate dominator is not different, bail out.
-        MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT);
-        if (IDom != Save)
-          Save = IDom;
-        else {
-          Save = nullptr;
+        Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+        if (!Save)
           break;
-        }
       } else {
         // If the loop does not exit, there is no point in looking
         // for a post-dominator outside the loop.
@@ -386,6 +385,41 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
   }
 }
 
+/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI.
+/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the
+/// loop header.
+static bool isProperBackedge(const MachineLoopInfo &MLI,
+                             const MachineBasicBlock *SrcBB,
+                             const MachineBasicBlock *DestBB) {
+  for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop;
+       Loop = Loop->getParentLoop()) {
+    if (Loop->getHeader() == DestBB)
+      return true;
+  }
+  return false;
+}
+
+/// Check if the CFG of \p MF is irreducible.
+static bool isIrreducibleCFG(const MachineFunction &MF,
+                             const MachineLoopInfo &MLI) {
+  const MachineBasicBlock *Entry = &*MF.begin();
+  ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry);
+  BitVector VisitedBB(MF.getNumBlockIDs());
+  for (const MachineBasicBlock *MBB : RPOT) {
+    VisitedBB.set(MBB->getNumber());
+    for (const MachineBasicBlock *SuccBB : MBB->successors()) {
+      if (!VisitedBB.test(SuccBB->getNumber()))
+        continue;
+      // We already visited SuccBB, thus MBB->SuccBB must be a backedge.
+      // Check that the head matches what we have in the loop information.
+      // Otherwise, we have an irreducible graph.
+      if (!isProperBackedge(MLI, MBB, SuccBB))
+        return true;
+    }
+  }
+  return false;
+}
+
 bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
   if (MF.empty() || !isShrinkWrapEnabled(MF))
     return false;
@@ -394,6 +428,17 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
 
   init(MF);
 
+  if (isIrreducibleCFG(MF, *MLI)) {
+    // If MF is irreducible, a block may be in a loop without
+    // MachineLoopInfo reporting it. I.e., we may use the
+    // post-dominance property in loops, which lead to incorrect
+    // results. Moreover, we may miss that the prologue and
+    // epilogue are not in the same loop, leading to unbalanced
+    // construction/deconstruction of the stack frame.
+    DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n");
+    return false;
+  }
+
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   std::unique_ptr<RegScavenger> RS(
       TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 3541b33..7b52038 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -43,6 +43,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -570,6 +571,14 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
       }
     }
 
+  // Update the location of C++ catch objects for the MSVC personality routine.
+  if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
+    for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
+      for (WinEHHandlerType &H : TBME.HandlerArray)
+        if (H.CatchObj.FrameIndex != INT_MAX &&
+            SlotRemap.count(H.CatchObj.FrameIndex))
+          H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
+
   DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
   DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
   DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 2426c27..886c5f6 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -144,10 +144,11 @@ static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow,
       HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts());
     HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue();
     HT.Handler = CPI->getParent();
-    if (isa<ConstantPointerNull>(CPI->getArgOperand(2)))
-      HT.CatchObj.Alloca = nullptr;
+    if (auto *AI =
+            dyn_cast<AllocaInst>(CPI->getArgOperand(2)->stripPointerCasts()))
+      HT.CatchObj.Alloca = AI;
     else
-      HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2));
+      HT.CatchObj.Alloca = nullptr;
     TBME.HandlerArray.push_back(HT);
   }
   FuncInfo.TryBlockMap.push_back(TBME);
@@ -664,24 +665,6 @@ void WinEHPrepare::colorFunclets(Function &F) {
   }
 }
 
-void llvm::calculateCatchReturnSuccessorColors(const Function *Fn,
-                                               WinEHFuncInfo &FuncInfo) {
-  for (const BasicBlock &BB : *Fn) {
-    const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator());
-    if (!CatchRet)
-      continue;
-    // A 'catchret' returns to the outer scope's color.
-    Value *ParentPad = CatchRet->getParentPad();
-    const BasicBlock *Color;
-    if (isa<ConstantTokenNone>(ParentPad))
-      Color = &Fn->getEntryBlock();
-    else
-      Color = cast<Instruction>(ParentPad)->getParent();
-    // Record the catchret successor's funclet membership.
-    FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color;
-  }
-}
-
 void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
   // Strip PHI nodes off of EH pads.
   SmallVector<PHINode *, 16> PHINodes;
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index c6bfbc0..a9dee7a 100644
--- a/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -15,6 +15,7 @@
 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
 
 #include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Support/LineIterator.h"
 
 namespace llvm {
 namespace symbolize {
@@ -24,7 +25,37 @@ namespace symbolize {
 static const char kDILineInfoBadString[] = "<invalid>";
 static const char kBadString[] = "??";
 
-void DIPrinter::printName(const DILineInfo &Info, bool Inlined) {
+// Prints source code around in the FileName the Line.
+void DIPrinter::printContext(std::string FileName, int64_t Line) {
+  if (PrintSourceContext <= 0)
+    return;
+
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+      MemoryBuffer::getFile(FileName);
+  if (!BufOrErr)
+    return;
+
+  std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
+  int64_t FirstLine =
+      std::max(static_cast<int64_t>(1), Line - PrintSourceContext / 2);
+  int64_t LastLine = FirstLine + PrintSourceContext;
+  size_t MaxLineNumberWidth = std::ceil(std::log10(LastLine));
+
+  for (line_iterator I = line_iterator(*Buf, false);
+       !I.is_at_eof() && I.line_number() <= LastLine; ++I) {
+    int64_t L = I.line_number();
+    if (L >= FirstLine && L <= LastLine) {
+      OS << format_decimal(L, MaxLineNumberWidth);
+      if (L == Line)
+        OS << " >: ";
+      else
+        OS << "  : ";
+      OS << *I << "\n";
+    }
+  }
+}
+
+void DIPrinter::print(const DILineInfo &Info, bool Inlined) {
   if (PrintFunctionNames) {
     std::string FunctionName = Info.FunctionName;
     if (FunctionName == kDILineInfoBadString)
@@ -38,21 +69,22 @@ void DIPrinter::printName(const DILineInfo &Info, bool Inlined) {
   if (Filename == kDILineInfoBadString)
     Filename = kBadString;
   OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
+  printContext(Filename, Info.Line);
 }
 
 DIPrinter &DIPrinter::operator<<(const DILineInfo &Info) {
-  printName(Info, false);
+  print(Info, false);
   return *this;
 }
 
 DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) {
   uint32_t FramesNum = Info.getNumberOfFrames();
   if (FramesNum == 0) {
-    printName(DILineInfo(), false);
+    print(DILineInfo(), false);
     return *this;
   }
   for (uint32_t i = 0; i < FramesNum; i++)
-    printName(Info.getFrame(i), i > 0);
+    print(Info.getFrame(i), i > 0);
   return *this;
 }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
index b931f10..01e829f 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
@@ -1,4 +1,4 @@
-//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===//
+//===------ OrcArchSupport.cpp - Architecture specific support code -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Triple.h"
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
 #include "llvm/Support/Process.h"
 #include <array>
 
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
index e519c7f..956daae 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
@@ -9,7 +9,7 @@
 
 #include "OrcCBindingsStack.h"
 
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include <cstdio>
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 2e17624..aae6a99 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -221,7 +221,8 @@ public:
   ModuleHandleT addIRModuleLazy(Module* M,
                                 LLVMOrcSymbolResolverFn ExternalResolver,
                                 void *ExternalResolverCtx) {
-    return addIRModule(CODLayer, std::move(M), nullptr,
+    return addIRModule(CODLayer, std::move(M),
+		       llvm::make_unique<SectionMemoryManager>(),
                        std::move(ExternalResolver), ExternalResolverCtx);
   }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp
new file mode 100644
index 0000000..e95115e
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp
@@ -0,0 +1,57 @@
+//===---------------- OrcError.cpp - Error codes for ORC ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Error codes for ORC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace {
+
+class OrcErrorCategory : public std::error_category {
+public:
+  const char *name() const LLVM_NOEXCEPT override { return "orc"; }
+
+  std::string message(int condition) const override {
+    switch (static_cast<OrcErrorCode>(condition)) {
+    case OrcErrorCode::RemoteAllocatorDoesNotExist:
+      return "Remote allocator does not exist";
+    case OrcErrorCode::RemoteAllocatorIdAlreadyInUse:
+      return "Remote allocator Id already in use";
+    case OrcErrorCode::RemoteMProtectAddrUnrecognized:
+      return "Remote mprotect call references unallocated memory";
+    case OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist:
+      return "Remote indirect stubs owner does not exist";
+    case OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse:
+      return "Remote indirect stubs owner Id already in use";
+    case OrcErrorCode::UnexpectedRPCCall:
+      return "Unexpected RPC call";
+    }
+    llvm_unreachable("Unhandled error code");
+  }
+};
+
+static ManagedStatic<OrcErrorCategory> OrcErrCat;
+}
+
+namespace llvm {
+namespace orc {
+
+std::error_code orcError(OrcErrorCode ErrCode) {
+  typedef std::underlying_type<OrcErrorCode>::type UT;
+  return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat);
+}
+}
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 38a27cf..2ab70a9 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -54,10 +54,13 @@ class OrcMCJITReplacement : public ExecutionEngine {
       return Addr;
     }
 
-    void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
-                                uintptr_t DataSizeRW) override {
-      return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO,
-                                                DataSizeRW);
+    void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+                                uintptr_t RODataSize, uint32_t RODataAlign,
+                                uintptr_t RWDataSize,
+                                uint32_t RWDataAlign) override {
+      return ClientMM->reserveAllocationSpace(CodeSize, CodeAlign,
+                                              RODataSize, RODataAlign,
+                                              RWDataSize, RWDataAlign);
     }
 
     bool needsToReserveAllocationSpace() override {
@@ -74,6 +77,11 @@ class OrcMCJITReplacement : public ExecutionEngine {
       return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
     }
 
+    void notifyObjectLoaded(RuntimeDyld &RTDyld,
+                            const object::ObjectFile &O) override {
+      return ClientMM->notifyObjectLoaded(RTDyld, O);
+    }
+
     void notifyObjectLoaded(ExecutionEngine *EE,
                             const object::ObjectFile &O) override {
       return ClientMM->notifyObjectLoaded(EE, O);
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp
new file mode 100644
index 0000000..064633b
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp
@@ -0,0 +1,83 @@
+//===------- OrcRemoteTargetRPCAPI.cpp - ORC Remote API utilities ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+const char *OrcRemoteTargetRPCAPI::getJITProcIdName(JITProcId Id) {
+  switch (Id) {
+  case InvalidId:
+    return "*** Invalid JITProcId ***";
+  case CallIntVoidId:
+    return "CallIntVoid";
+  case CallIntVoidResponseId:
+    return "CallIntVoidResponse";
+  case CallMainId:
+    return "CallMain";
+  case CallMainResponseId:
+    return "CallMainResponse";
+  case CallVoidVoidId:
+    return "CallVoidVoid";
+  case CallVoidVoidResponseId:
+    return "CallVoidVoidResponse";
+  case CreateRemoteAllocatorId:
+    return "CreateRemoteAllocator";
+  case CreateIndirectStubsOwnerId:
+    return "CreateIndirectStubsOwner";
+  case DestroyRemoteAllocatorId:
+    return "DestroyRemoteAllocator";
+  case DestroyIndirectStubsOwnerId:
+    return "DestroyIndirectStubsOwner";
+  case EmitIndirectStubsId:
+    return "EmitIndirectStubs";
+  case EmitIndirectStubsResponseId:
+    return "EmitIndirectStubsResponse";
+  case EmitResolverBlockId:
+    return "EmitResolverBlock";
+  case EmitTrampolineBlockId:
+    return "EmitTrampolineBlock";
+  case EmitTrampolineBlockResponseId:
+    return "EmitTrampolineBlockResponse";
+  case GetSymbolAddressId:
+    return "GetSymbolAddress";
+  case GetSymbolAddressResponseId:
+    return "GetSymbolAddressResponse";
+  case GetRemoteInfoId:
+    return "GetRemoteInfo";
+  case GetRemoteInfoResponseId:
+    return "GetRemoteInfoResponse";
+  case ReadMemId:
+    return "ReadMem";
+  case ReadMemResponseId:
+    return "ReadMemResponse";
+  case ReserveMemId:
+    return "ReserveMem";
+  case ReserveMemResponseId:
+    return "ReserveMemResponse";
+  case RequestCompileId:
+    return "RequestCompile";
+  case RequestCompileResponseId:
+    return "RequestCompileResponse";
+  case SetProtectionsId:
+    return "SetProtections";
+  case TerminateSessionId:
+    return "TerminateSession";
+  case WriteMemId:
+    return "WriteMem";
+  case WritePtrId:
+    return "WritePtr";
+  };
+  return nullptr;
+}
+}
+}
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index a95f3bb..d16b2db 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -146,9 +146,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
   // Compute the memory size required to load all sections to be loaded
   // and pass this information to the memory manager
   if (MemMgr.needsToReserveAllocationSpace()) {
-    uint64_t CodeSize = 0, DataSizeRO = 0, DataSizeRW = 0;
-    computeTotalAllocSize(Obj, CodeSize, DataSizeRO, DataSizeRW);
-    MemMgr.reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
+    uint64_t CodeSize = 0, RODataSize = 0, RWDataSize = 0;
+    uint32_t CodeAlign = 1, RODataAlign = 1, RWDataAlign = 1;
+    computeTotalAllocSize(Obj, CodeSize, CodeAlign, RODataSize, RODataAlign,
+                          RWDataSize, RWDataAlign);
+    MemMgr.reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign,
+                                  RWDataSize, RWDataAlign);
   }
 
   // Used sections from the object file
@@ -335,13 +338,15 @@ static bool isZeroInit(const SectionRef Section) {
 // sections
 void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
                                             uint64_t &CodeSize,
-                                            uint64_t &DataSizeRO,
-                                            uint64_t &DataSizeRW) {
+                                            uint32_t &CodeAlign,
+                                            uint64_t &RODataSize,
+                                            uint32_t &RODataAlign,
+                                            uint64_t &RWDataSize,
+                                            uint32_t &RWDataAlign) {
   // Compute the size of all sections required for execution
   std::vector<uint64_t> CodeSectionSizes;
   std::vector<uint64_t> ROSectionSizes;
   std::vector<uint64_t> RWSectionSizes;
-  uint64_t MaxAlignment = sizeof(void *);
 
   // Collect sizes of all sections to be loaded;
   // also determine the max alignment of all sections
@@ -376,17 +381,15 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
         SectionSize = 1;
 
       if (IsCode) {
+        CodeAlign = std::max(CodeAlign, Alignment);
         CodeSectionSizes.push_back(SectionSize);
       } else if (IsReadOnly) {
+        RODataAlign = std::max(RODataAlign, Alignment);
         ROSectionSizes.push_back(SectionSize);
       } else {
+        RWDataAlign = std::max(RWDataAlign, Alignment);
         RWSectionSizes.push_back(SectionSize);
       }
-
-      // update the max alignment
-      if (Alignment > MaxAlignment) {
-        MaxAlignment = Alignment;
-      }
     }
   }
 
@@ -410,9 +413,9 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
   // allocated with the max alignment. Note that we cannot compute with the
   // individual alignments of the sections, because then the required size
   // depends on the order, in which the sections are allocated.
-  CodeSize = computeAllocationSizeForSections(CodeSectionSizes, MaxAlignment);
-  DataSizeRO = computeAllocationSizeForSections(ROSectionSizes, MaxAlignment);
-  DataSizeRW = computeAllocationSizeForSections(RWSectionSizes, MaxAlignment);
+  CodeSize = computeAllocationSizeForSections(CodeSectionSizes, CodeAlign);
+  RODataSize = computeAllocationSizeForSections(ROSectionSizes, RODataAlign);
+  RWDataSize = computeAllocationSizeForSections(RWSectionSizes, RWDataAlign);
 }
 
 // compute stub buffer size for the given section
@@ -937,7 +940,9 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) {
   if (!Dyld->isCompatibleFile(Obj))
     report_fatal_error("Incompatible object format!");
 
-  return Dyld->loadObject(Obj);
+  auto LoadedObjInfo = Dyld->loadObject(Obj);
+  MemMgr.notifyObjectLoaded(*this, Obj);
+  return LoadedObjInfo;
 }
 
 void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const {
@@ -967,6 +972,17 @@ bool RuntimeDyld::hasError() { return Dyld->hasError(); }
 
 StringRef RuntimeDyld::getErrorString() { return Dyld->getErrorString(); }
 
+void RuntimeDyld::finalizeWithMemoryManagerLocking() {
+  bool MemoryFinalizationLocked = MemMgr.FinalizationLocked;
+  MemMgr.FinalizationLocked = true;
+  resolveRelocations();
+  registerEHFrames();
+  if (!MemoryFinalizationLocked) {
+    MemMgr.finalizeMemory();
+    MemMgr.FinalizationLocked = false;
+  }
+}
+
 void RuntimeDyld::registerEHFrames() {
   if (Dyld)
     Dyld->registerEHFrames();
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index dafd3c8..ab732c6 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -411,8 +411,10 @@ protected:
 
   // \brief Compute an upper bound of the memory that is required to load all
   // sections
-  void computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize,
-                             uint64_t &DataSizeRO, uint64_t &DataSizeRW);
+  void computeTotalAllocSize(const ObjectFile &Obj,
+                             uint64_t &CodeSize, uint32_t &CodeAlign,
+                             uint64_t &RODataSize, uint32_t &RODataAlign,
+                             uint64_t &RWDataSize, uint32_t &RWDataAlign);
 
   // \brief Compute the stub buffer size required for a section
   unsigned computeSectionStubBufSize(const ObjectFile &Obj,
diff --git a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
index e2f2208..1ad5f17 100644
--- a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -137,9 +137,6 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
     return true;
   }
 
-  // Don't allow free memory blocks to be used after setting protection flags.
-  RODataMem.FreeMem.clear();
-
   // Make read-only data memory read-only.
   ec = applyMemoryGroupPermissions(RODataMem,
                                    sys::Memory::MF_READ | sys::Memory::MF_EXEC);
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index 1ebe9b7..0ce44e1 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -3121,7 +3121,7 @@ void AssemblyWriter::printMetadataAttachments(
     return;
 
   if (MDNames.empty())
-    TheModule->getMDKindNames(MDNames);
+    MDs[0].second->getContext().getMDKindNames(MDNames);
 
   for (const auto &I : MDs) {
     unsigned Kind = I.first;
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index 7f39c80..591dafa 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -1722,7 +1722,7 @@ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
 
 const char *LLVMGetGC(LLVMValueRef Fn) {
   Function *F = unwrap<Function>(Fn);
-  return F->hasGC()? F->getGC() : nullptr;
+  return F->hasGC()? F->getGC().c_str() : nullptr;
 }
 
 void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index cfb40b1..cfdfc40 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -366,47 +366,21 @@ void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
   setAttributes(PAL);
 }
 
-// Maintain the GC name for each function in an on-the-side table. This saves
-// allocating an additional word in Function for programs which do not use GC
-// (i.e., most programs) at the cost of increased overhead for clients which do
-// use GC.
-static DenseMap<const Function*,PooledStringPtr> *GCNames;
-static StringPool *GCNamePool;
-static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
-
-bool Function::hasGC() const {
-  sys::SmartScopedReader<true> Reader(*GCLock);
-  return GCNames && GCNames->count(this);
-}
-
-const char *Function::getGC() const {
+const std::string &Function::getGC() const {
   assert(hasGC() && "Function has no collector");
-  sys::SmartScopedReader<true> Reader(*GCLock);
-  return *(*GCNames)[this];
+  return getContext().getGC(*this);
 }
 
-void Function::setGC(const char *Str) {
-  sys::SmartScopedWriter<true> Writer(*GCLock);
-  if (!GCNamePool)
-    GCNamePool = new StringPool();
-  if (!GCNames)
-    GCNames = new DenseMap<const Function*,PooledStringPtr>();
-  (*GCNames)[this] = GCNamePool->intern(Str);
+void Function::setGC(const std::string Str) {
+  setValueSubclassDataBit(14, !Str.empty());
+  getContext().setGC(*this, std::move(Str));
 }
 
 void Function::clearGC() {
-  sys::SmartScopedWriter<true> Writer(*GCLock);
-  if (GCNames) {
-    GCNames->erase(this);
-    if (GCNames->empty()) {
-      delete GCNames;
-      GCNames = nullptr;
-      if (GCNamePool->empty()) {
-        delete GCNamePool;
-        GCNamePool = nullptr;
-      }
-    }
-  }
+  if (!hasGC())
+    return;
+  getContext().deleteGC(*this);
+  setValueSubclassDataBit(14, false);
 }
 
 /// Copy all additional attributes (those not needed to create a Function) from
diff --git a/contrib/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
index c1ac336..822dbeb 100644
--- a/contrib/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
@@ -28,7 +28,13 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
 
 PreservedAnalyses PrintModulePass::run(Module &M) {
   OS << Banner;
-  M.print(OS, nullptr, ShouldPreserveUseListOrder);
+  if (llvm::isFunctionInPrintList("*"))
+    M.print(OS, nullptr, ShouldPreserveUseListOrder);
+  else {
+    for(const auto &F : M.functions())
+      if (llvm::isFunctionInPrintList(F.getName()))
+        F.print(OS);
+  }
   return PreservedAnalyses::all();
 }
 
@@ -37,7 +43,8 @@ PrintFunctionPass::PrintFunctionPass(raw_ostream &OS, const std::string &Banner)
     : OS(OS), Banner(Banner) {}
 
 PreservedAnalyses PrintFunctionPass::run(Function &F) {
-  OS << Banner << static_cast<Value &>(F);
+  if (isFunctionInPrintList(F.getName()))
+    OS << Banner << static_cast<Value &>(F);
   return PreservedAnalyses::all();
 }
 
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
index 8848bcb..48b53b0 100644
--- a/contrib/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -304,3 +304,19 @@ void LLVMContext::getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const {
 uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const {
   return pImpl->getOperandBundleTagID(Tag);
 }
+
+void LLVMContext::setGC(const Function &Fn, std::string GCName) {
+  auto It = pImpl->GCNames.find(&Fn);
+
+  if (It == pImpl->GCNames.end()) {
+    pImpl->GCNames.insert(std::make_pair(&Fn, std::move(GCName)));
+    return;
+  }
+  It->second = std::move(GCName);
+}
+const std::string &LLVMContext::getGC(const Function &Fn) {
+  return pImpl->GCNames[&Fn];
+}
+void LLVMContext::deleteGC(const Function &Fn) {
+  pImpl->GCNames.erase(&Fn);
+}
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
index a24114d..d42047d 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -1027,6 +1027,13 @@ public:
   void getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const;
   uint32_t getOperandBundleTagID(StringRef Tag) const;
 
+  /// Maintain the GC name for each function.
+  ///
+  /// This saves allocating an additional word in Function for programs which
+  /// do not use GC (i.e., most programs) at the cost of increased overhead for
+  /// clients which do use GC.
+  DenseMap<const Function*, std::string> GCNames;
+
   LLVMContextImpl(LLVMContext &C);
   ~LLVMContextImpl();
 
diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp
index f2e0c7d..63d89f2 100644
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <map>
+#include <unordered_set>
 using namespace llvm;
 using namespace llvm::legacy;
 
@@ -83,6 +84,13 @@ PrintAfterAll("print-after-all",
               llvm::cl::desc("Print IR after each pass"),
               cl::init(false));
 
+static cl::list<std::string>
+    PrintFuncsList("filter-print-funcs", cl::value_desc("function names"),
+                   cl::desc("Only print IR for functions whose name "
+                            "match this for all print-[before|after][-all] "
+                            "options"),
+                   cl::CommaSeparated);
+
 /// This is a helper to determine whether to print IR before or
 /// after a pass.
 
@@ -109,6 +117,11 @@ static bool ShouldPrintAfterPass(const PassInfo *PI) {
   return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
 }
 
+bool llvm::isFunctionInPrintList(StringRef FunctionName) {
+  static std::unordered_set<std::string> PrintFuncNames(PrintFuncsList.begin(),
+                                                        PrintFuncsList.end());
+  return PrintFuncNames.empty() || PrintFuncNames.count(FunctionName);
+}
 /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
 /// or higher is specified.
 bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index d8eaceb..9a9a501 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -557,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() {
     resolve();
 }
 
-void MDNode::resolveCycles(bool AllowTemps) {
+void MDNode::resolveRecursivelyImpl(bool AllowTemps) {
   if (isResolved())
     return;
 
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 6dfb05d..9198b0e 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -45,6 +45,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/Verifier.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -145,6 +146,11 @@ private:
     OS << *C;
   }
 
+  template <typename T> void Write(ArrayRef<T> Vs) {
+    for (const T &V : Vs)
+      Write(V);
+  }
+
   template <typename T1, typename... Ts>
   void WriteTs(const T1 &V1, const Ts &... Vs) {
     Write(V1);
@@ -204,6 +210,10 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   /// given function and the largest index passed to llvm.localrecover.
   DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
 
+  // Maps catchswitches and cleanuppads that unwind to siblings to the
+  // terminators that indicate the unwind, used to detect cycles therein.
+  MapVector<Instruction *, TerminatorInst *> SiblingFuncletInfo;
+
   /// Cache of constants visited in search of ConstantExprs.
   SmallPtrSet<const Constant *, 32> ConstantExprVisited;
 
@@ -245,9 +255,11 @@ public:
     Broken = false;
     // FIXME: We strip const here because the inst visitor strips const.
     visit(const_cast<Function &>(F));
+    verifySiblingFuncletUnwinds();
     InstsInThisBlock.clear();
     LandingPadResultTy = nullptr;
     SawFrameEscape = false;
+    SiblingFuncletInfo.clear();
 
     return !Broken;
   }
@@ -403,6 +415,7 @@ private:
   void visitCatchPadInst(CatchPadInst &CPI);
   void visitCatchReturnInst(CatchReturnInst &CatchReturn);
   void visitCleanupPadInst(CleanupPadInst &CPI);
+  void visitFuncletPadInst(FuncletPadInst &FPI);
   void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch);
   void visitCleanupReturnInst(CleanupReturnInst &CRI);
 
@@ -428,6 +441,7 @@ private:
   void visitConstantExpr(const ConstantExpr *CE);
   void VerifyStatepoint(ImmutableCallSite CS);
   void verifyFrameRecoverIndices();
+  void verifySiblingFuncletUnwinds();
 
   // Module-level debug info verification...
   void verifyTypeRefs();
@@ -984,6 +998,9 @@ void Verifier::visitDIMacro(const DIMacro &N) {
          N.getMacinfoType() == dwarf::DW_MACINFO_undef,
          "invalid macinfo type", &N);
   Assert(!N.getName().empty(), "anonymous macro", &N);
+  if (!N.getValue().empty()) {
+    assert(N.getValue().data()[0] != ' ' && "Macro value has a space prefix");
+  }
 }
 
 void Verifier::visitDIMacroFile(const DIMacroFile &N) {
@@ -1693,6 +1710,59 @@ void Verifier::verifyFrameRecoverIndices() {
   }
 }
 
+static Instruction *getSuccPad(TerminatorInst *Terminator) {
+  BasicBlock *UnwindDest;
+  if (auto *II = dyn_cast<InvokeInst>(Terminator))
+    UnwindDest = II->getUnwindDest();
+  else if (auto *CSI = dyn_cast<CatchSwitchInst>(Terminator))
+    UnwindDest = CSI->getUnwindDest();
+  else
+    UnwindDest = cast<CleanupReturnInst>(Terminator)->getUnwindDest();
+  return UnwindDest->getFirstNonPHI();
+}
+
+void Verifier::verifySiblingFuncletUnwinds() {
+  SmallPtrSet<Instruction *, 8> Visited;
+  SmallPtrSet<Instruction *, 8> Active;
+  for (const auto &Pair : SiblingFuncletInfo) {
+    Instruction *PredPad = Pair.first;
+    if (Visited.count(PredPad))
+      continue;
+    Active.insert(PredPad);
+    TerminatorInst *Terminator = Pair.second;
+    do {
+      Instruction *SuccPad = getSuccPad(Terminator);
+      if (Active.count(SuccPad)) {
+        // Found a cycle; report error
+        Instruction *CyclePad = SuccPad;
+        SmallVector<Instruction *, 8> CycleNodes;
+        do {
+          CycleNodes.push_back(CyclePad);
+          TerminatorInst *CycleTerminator = SiblingFuncletInfo[CyclePad];
+          if (CycleTerminator != CyclePad)
+            CycleNodes.push_back(CycleTerminator);
+          CyclePad = getSuccPad(CycleTerminator);
+        } while (CyclePad != SuccPad);
+        Assert(false, "EH pads can't handle each other's exceptions",
+               ArrayRef<Instruction *>(CycleNodes));
+      }
+      // Don't re-walk a node we've already checked
+      if (!Visited.insert(SuccPad).second)
+        break;
+      // Walk to this successor if it has a map entry.
+      PredPad = SuccPad;
+      auto TermI = SiblingFuncletInfo.find(PredPad);
+      if (TermI == SiblingFuncletInfo.end())
+        break;
+      Terminator = TermI->second;
+      Active.insert(PredPad);
+    } while (true);
+    // Each node only has one successor, so we've walked all the active
+    // nodes' successors.
+    Active.clear();
+  }
+}
+
 // visitFunction - Verify that a function is ok.
 //
 void Verifier::visitFunction(const Function &F) {
@@ -2892,6 +2962,13 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
   visitInstruction(IVI);
 }
 
+static Value *getParentPad(Value *EHPad) {
+  if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
+    return FPI->getParentPad();
+
+  return cast<CatchSwitchInst>(EHPad)->getParentPad();
+}
+
 void Verifier::visitEHPadPredecessors(Instruction &I) {
   assert(I.isEHPad());
 
@@ -2919,16 +2996,45 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
              "Block containg CatchPadInst must be jumped to "
              "only by its catchswitch.",
              CPI);
+    Assert(BB != CPI->getCatchSwitch()->getUnwindDest(),
+           "Catchswitch cannot unwind to one of its catchpads",
+           CPI->getCatchSwitch(), CPI);
     return;
   }
 
+  // Verify that each pred has a legal terminator with a legal to/from EH
+  // pad relationship.
+  Instruction *ToPad = &I;
+  Value *ToPadParent = getParentPad(ToPad);
   for (BasicBlock *PredBB : predecessors(BB)) {
     TerminatorInst *TI = PredBB->getTerminator();
+    Value *FromPad;
     if (auto *II = dyn_cast<InvokeInst>(TI)) {
       Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB,
-             "EH pad must be jumped to via an unwind edge", &I, II);
-    } else if (!isa<CleanupReturnInst>(TI) && !isa<CatchSwitchInst>(TI)) {
-      Assert(false, "EH pad must be jumped to via an unwind edge", &I, TI);
+             "EH pad must be jumped to via an unwind edge", ToPad, II);
+      if (auto Bundle = II->getOperandBundle(LLVMContext::OB_funclet))
+        FromPad = Bundle->Inputs[0];
+      else
+        FromPad = ConstantTokenNone::get(II->getContext());
+    } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+      FromPad = CRI->getCleanupPad();
+      Assert(FromPad != ToPadParent, "A cleanupret must exit its cleanup", CRI);
+    } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
+      FromPad = CSI;
+    } else {
+      Assert(false, "EH pad must be jumped to via an unwind edge", ToPad, TI);
+    }
+
+    // The edge may exit from zero or more nested pads.
+    for (;; FromPad = getParentPad(FromPad)) {
+      Assert(FromPad != ToPad,
+             "EH pad cannot handle exceptions raised within it", FromPad, TI);
+      if (FromPad == ToPadParent) {
+        // This is a legal unwind edge.
+        break;
+      }
+      Assert(!isa<ConstantTokenNone>(FromPad),
+             "A single unwind edge may only enter one EH pad", TI);
     }
   }
 }
@@ -2992,7 +3098,7 @@ void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
   Assert(BB->getFirstNonPHI() == &CPI,
          "CatchPadInst not the first non-PHI instruction in the block.", &CPI);
 
-  visitInstruction(CPI);
+  visitFuncletPadInst(CPI);
 }
 
 void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) {
@@ -3022,33 +3128,160 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
   Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
          "CleanupPadInst has an invalid parent.", &CPI);
 
+  visitFuncletPadInst(CPI);
+}
+
+void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
   User *FirstUser = nullptr;
-  BasicBlock *FirstUnwindDest = nullptr;
-  for (User *U : CPI.users()) {
-    BasicBlock *UnwindDest;
-    if (CleanupReturnInst *CRI = dyn_cast<CleanupReturnInst>(U)) {
-      UnwindDest = CRI->getUnwindDest();
-    } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
-      continue;
-    } else if (CallSite(U)) {
-      continue;
-    } else {
-      Assert(false, "bogus cleanuppad use", &CPI);
+  Value *FirstUnwindPad = nullptr;
+  SmallVector<FuncletPadInst *, 8> Worklist({&FPI});
+  while (!Worklist.empty()) {
+    FuncletPadInst *CurrentPad = Worklist.pop_back_val();
+    Value *UnresolvedAncestorPad = nullptr;
+    for (User *U : CurrentPad->users()) {
+      BasicBlock *UnwindDest;
+      if (auto *CRI = dyn_cast<CleanupReturnInst>(U)) {
+        UnwindDest = CRI->getUnwindDest();
+      } else if (auto *CSI = dyn_cast<CatchSwitchInst>(U)) {
+        // We allow catchswitch unwind to caller to nest
+        // within an outer pad that unwinds somewhere else,
+        // because catchswitch doesn't have a nounwind variant.
+        // See e.g. SimplifyCFGOpt::SimplifyUnreachable.
+        if (CSI->unwindsToCaller())
+          continue;
+        UnwindDest = CSI->getUnwindDest();
+      } else if (auto *II = dyn_cast<InvokeInst>(U)) {
+        UnwindDest = II->getUnwindDest();
+      } else if (isa<CallInst>(U)) {
+        // Calls which don't unwind may be found inside funclet
+        // pads that unwind somewhere else.  We don't *require*
+        // such calls to be annotated nounwind.
+        continue;
+      } else if (auto *CPI = dyn_cast<CleanupPadInst>(U)) {
+        // The unwind dest for a cleanup can only be found by
+        // recursive search.  Add it to the worklist, and we'll
+        // search for its first use that determines where it unwinds.
+        Worklist.push_back(CPI);
+        continue;
+      } else {
+        Assert(isa<CatchReturnInst>(U), "Bogus funclet pad use", U);
+        continue;
+      }
+
+      Value *UnwindPad;
+      bool ExitsFPI;
+      if (UnwindDest) {
+        UnwindPad = UnwindDest->getFirstNonPHI();
+        Value *UnwindParent = getParentPad(UnwindPad);
+        // Ignore unwind edges that don't exit CurrentPad.
+        if (UnwindParent == CurrentPad)
+          continue;
+        // Determine whether the original funclet pad is exited,
+        // and if we are scanning nested pads determine how many
+        // of them are exited so we can stop searching their
+        // children.
+        Value *ExitedPad = CurrentPad;
+        ExitsFPI = false;
+        do {
+          if (ExitedPad == &FPI) {
+            ExitsFPI = true;
+            // Now we can resolve any ancestors of CurrentPad up to
+            // FPI, but not including FPI since we need to make sure
+            // to check all direct users of FPI for consistency.
+            UnresolvedAncestorPad = &FPI;
+            break;
+          }
+          Value *ExitedParent = getParentPad(ExitedPad);
+          if (ExitedParent == UnwindParent) {
+            // ExitedPad is the ancestor-most pad which this unwind
+            // edge exits, so we can resolve up to it, meaning that
+            // ExitedParent is the first ancestor still unresolved.
+            UnresolvedAncestorPad = ExitedParent;
+            break;
+          }
+          ExitedPad = ExitedParent;
+        } while (!isa<ConstantTokenNone>(ExitedPad));
+      } else {
+        // Unwinding to caller exits all pads.
+        UnwindPad = ConstantTokenNone::get(FPI.getContext());
+        ExitsFPI = true;
+        UnresolvedAncestorPad = &FPI;
+      }
+
+      if (ExitsFPI) {
+        // This unwind edge exits FPI.  Make sure it agrees with other
+        // such edges.
+        if (FirstUser) {
+          Assert(UnwindPad == FirstUnwindPad, "Unwind edges out of a funclet "
+                                              "pad must have the same unwind "
+                                              "dest",
+                 &FPI, U, FirstUser);
+        } else {
+          FirstUser = U;
+          FirstUnwindPad = UnwindPad;
+          // Record cleanup sibling unwinds for verifySiblingFuncletUnwinds
+          if (isa<CleanupPadInst>(&FPI) && !isa<ConstantTokenNone>(UnwindPad) &&
+              getParentPad(UnwindPad) == getParentPad(&FPI))
+            SiblingFuncletInfo[&FPI] = cast<TerminatorInst>(U);
+        }
+      }
+      // Make sure we visit all uses of FPI, but for nested pads stop as
+      // soon as we know where they unwind to.
+      if (CurrentPad != &FPI)
+        break;
     }
+    if (UnresolvedAncestorPad) {
+      if (CurrentPad == UnresolvedAncestorPad) {
+        // When CurrentPad is FPI itself, we don't mark it as resolved even if
+        // we've found an unwind edge that exits it, because we need to verify
+        // all direct uses of FPI.
+        assert(CurrentPad == &FPI);
+        continue;
+      }
+      // Pop off the worklist any nested pads that we've found an unwind
+      // destination for.  The pads on the worklist are the uncles,
+      // great-uncles, etc. of CurrentPad.  We've found an unwind destination
+      // for all ancestors of CurrentPad up to but not including
+      // UnresolvedAncestorPad.
+      Value *ResolvedPad = CurrentPad;
+      while (!Worklist.empty()) {
+        Value *UnclePad = Worklist.back();
+        Value *AncestorPad = getParentPad(UnclePad);
+        // Walk ResolvedPad up the ancestor list until we either find the
+        // uncle's parent or the last resolved ancestor.
+        while (ResolvedPad != AncestorPad) {
+          Value *ResolvedParent = getParentPad(ResolvedPad);
+          if (ResolvedParent == UnresolvedAncestorPad) {
+            break;
+          }
+          ResolvedPad = ResolvedParent;
+        }
+        // If the resolved ancestor search didn't find the uncle's parent,
+        // then the uncle is not yet resolved.
+        if (ResolvedPad != AncestorPad)
+          break;
+        // This uncle is resolved, so pop it from the worklist.
+        Worklist.pop_back();
+      }
+    }
+  }
 
-    if (!FirstUser) {
-      FirstUser = U;
-      FirstUnwindDest = UnwindDest;
-    } else {
-      Assert(
-          UnwindDest == FirstUnwindDest,
-          "cleanupret instructions from the same cleanuppad must have the same "
-          "unwind destination",
-          FirstUser, U);
+  if (FirstUnwindPad) {
+    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FPI.getParentPad())) {
+      BasicBlock *SwitchUnwindDest = CatchSwitch->getUnwindDest();
+      Value *SwitchUnwindPad;
+      if (SwitchUnwindDest)
+        SwitchUnwindPad = SwitchUnwindDest->getFirstNonPHI();
+      else
+        SwitchUnwindPad = ConstantTokenNone::get(FPI.getContext());
+      Assert(SwitchUnwindPad == FirstUnwindPad,
+             "Unwind edges out of a catch must have the same unwind dest as "
+             "the parent catchswitch",
+             &FPI, FirstUser, CatchSwitch);
     }
   }
 
-  visitInstruction(CPI);
+  visitInstruction(FPI);
 }
 
 void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
@@ -3067,17 +3300,21 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
          "CatchSwitchInst not the first non-PHI instruction in the block.",
          &CatchSwitch);
 
+  auto *ParentPad = CatchSwitch.getParentPad();
+  Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
+         "CatchSwitchInst has an invalid parent.", ParentPad);
+
   if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) {
     Instruction *I = UnwindDest->getFirstNonPHI();
     Assert(I->isEHPad() && !isa<LandingPadInst>(I),
            "CatchSwitchInst must unwind to an EH block which is not a "
            "landingpad.",
            &CatchSwitch);
-  }
 
-  auto *ParentPad = CatchSwitch.getParentPad();
-  Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
-         "CatchSwitchInst has an invalid parent.", ParentPad);
+    // Record catchswitch sibling unwinds for verifySiblingFuncletUnwinds
+    if (getParentPad(I) == ParentPad)
+      SiblingFuncletInfo[&CatchSwitch] = &CatchSwitch;
+  }
 
   Assert(CatchSwitch.getNumHandlers() != 0,
          "CatchSwitchInst cannot have empty handler list", &CatchSwitch);
@@ -3652,6 +3889,9 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
   case Intrinsic::experimental_gc_relocate: {
     Assert(CS.getNumArgOperands() == 3, "wrong number of arguments", CS);
 
+    Assert(isa<PointerType>(CS.getType()->getScalarType()),
+           "gc.relocate must return a pointer or a vector of pointers", CS);
+
     // Check that this relocate is correctly tied to the statepoint
 
     // This is case for relocate on the unwinding path of an invoke statepoint
@@ -3734,17 +3974,20 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
            "'gc parameters' section of the statepoint call",
            CS);
 
-    // Relocated value must be a pointer type, but gc_relocate does not need to return the
-    // same pointer type as the relocated pointer. It can be casted to the correct type later
-    // if it's desired. However, they must have the same address space.
+    // Relocated value must be either a pointer type or vector-of-pointer type,
+    // but gc_relocate does not need to return the same pointer type as the
+    // relocated pointer. It can be casted to the correct type later if it's
+    // desired. However, they must have the same address space and 'vectorness'
     GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
-    Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(),
+    Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(),
            "gc.relocate: relocated value must be a gc pointer", CS);
 
-    // gc_relocate return type must be a pointer type, and is verified earlier in
-    // VerifyIntrinsicType().
-    Assert(cast<PointerType>(CS.getType())->getAddressSpace() ==
-           cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(),
+    auto ResultType = CS.getType();
+    auto DerivedType = Relocate.getDerivedPtr()->getType();
+    Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(),
+           "gc.relocate: vector relocates to vector and pointer to pointer", CS);
+    Assert(ResultType->getPointerAddressSpace() ==
+           DerivedType->getPointerAddressSpace(),
            "gc.relocate: relocating a pointer shouldn't change its address space", CS);
     break;
   }
diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
index 6baaaa4..66df23b 100644
--- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -92,7 +92,8 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeSROALegacyPassPass(R);
   initializeSROA_DTPass(R);
   initializeSROA_SSAUpPass(R);
-  initializeFunctionAttrsPass(R);
+  initializePostOrderFunctionAttrsPass(R);
+  initializeReversePostOrderFunctionAttrsPass(R);
   initializeGlobalsAAWrapperPassPass(R);
   initializeLICMPass(R);
   initializeMergedLoadStoreMotionPass(R);
diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp
index 309690f..8dd59f9 100644
--- a/contrib/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm/lib/Linker/IRMover.cpp
@@ -773,6 +773,16 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
     NewGV->setLinkage(GlobalValue::ExternalWeakLinkage);
 
   NewGV->copyAttributesFrom(SGV);
+
+  // Remove these copied constants in case this stays a declaration, since
+  // they point to the source module. If the def is linked the values will
+  // be mapped in during linkFunctionBody.
+  if (auto *NewF = dyn_cast<Function>(NewGV)) {
+    NewF->setPersonalityFn(nullptr);
+    NewF->setPrefixData(nullptr);
+    NewF->setPrologueData(nullptr);
+  }
+
   return NewGV;
 }
 
@@ -1211,6 +1221,18 @@ void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) {
   for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
     auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
     assert(CU && "Expected valid compile unit");
+    // Ensure that we don't remove subprograms referenced by DIImportedEntity.
+    // It is not legal to have a DIImportedEntity with a null entity or scope.
+    // FIXME: The DISubprogram for functions not linked in but kept due to
+    // being referenced by a DIImportedEntity should also get their
+    // IsDefinition flag is unset.
+    SmallPtrSet<DISubprogram *, 8> ImportedEntitySPs;
+    for (auto *IE : CU->getImportedEntities()) {
+      if (auto *SP = dyn_cast<DISubprogram>(IE->getEntity()))
+        ImportedEntitySPs.insert(SP);
+      if (auto *SP = dyn_cast<DISubprogram>(IE->getScope()))
+        ImportedEntitySPs.insert(SP);
+    }
     for (auto *Op : CU->getSubprograms()) {
       // Unless we were doing function importing and deferred metadata linking,
       // any needed SPs should have been mapped as they would be reached
@@ -1218,7 +1240,7 @@ void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) {
       // function bodies, or from DILocation on inlined instructions).
       assert(!(ValueMap.MD()[Op] && IsMetadataLinkingPostpass) &&
              "DISubprogram shouldn't be mapped yet");
-      if (!ValueMap.MD()[Op])
+      if (!ValueMap.MD()[Op] && !ImportedEntitySPs.count(Op))
         UnneededSubprograms.insert(Op);
     }
   }
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index 9de3be4..6ffa71e 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -65,9 +65,6 @@ class ModuleLinker {
     return Flags & Linker::InternalizeLinkedSymbols;
   }
 
-  /// Check if we should promote the given local value to global scope.
-  bool doPromoteLocalToGlobal(const GlobalValue *SGV);
-
   bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
                             const GlobalValue &Src);
 
@@ -97,11 +94,11 @@ class ModuleLinker {
     Module &DstM = Mover.getModule();
     // If the source has no name it can't link.  If it has local linkage,
     // there is no name match-up going on.
-    if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(getLinkage(SrcGV)))
+    if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
       return nullptr;
 
     // Otherwise see if we have a match in the destination module's symtab.
-    GlobalValue *DGV = DstM.getNamedValue(getName(SrcGV));
+    GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
     if (!DGV)
       return nullptr;
 
@@ -116,6 +113,64 @@ class ModuleLinker {
 
   bool linkIfNeeded(GlobalValue &GV);
 
+  /// Helper method to check if we are importing from the current source
+  /// module.
+  bool isPerformingImport() const { return FunctionsToImport != nullptr; }
+
+  /// If we are importing from the source module, checks if we should
+  /// import SGV as a definition, otherwise import as a declaration.
+  bool doImportAsDefinition(const GlobalValue *SGV);
+
+public:
+  ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags,
+               const FunctionInfoIndex *Index = nullptr,
+               DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
+               DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr)
+      : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index),
+        FunctionsToImport(FunctionsToImport),
+        ValIDToTempMDMap(ValIDToTempMDMap) {
+    assert((ImportIndex || !FunctionsToImport) &&
+           "Expect a FunctionInfoIndex when importing");
+    // If we have a FunctionInfoIndex but no function to import,
+    // then this is the primary module being compiled in a ThinLTO
+    // backend compilation, and we need to see if it has functions that
+    // may be exported to another backend compilation.
+    if (ImportIndex && !FunctionsToImport)
+      HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM);
+    assert((ValIDToTempMDMap || !FunctionsToImport) &&
+           "Function importing must provide a ValIDToTempMDMap");
+  }
+
+  bool run();
+};
+
+/// Class to handle necessary GlobalValue changes required by ThinLTO including
+/// linkage changes and any necessary renaming.
+class ThinLTOGlobalProcessing {
+  /// The Module which we are exporting or importing functions from.
+  Module &M;
+
+  /// Function index passed in for function importing/exporting handling.
+  const FunctionInfoIndex *ImportIndex;
+
+  /// Functions to import from this module, all other functions will be
+  /// imported as declarations instead of definitions.
+  DenseSet<const GlobalValue *> *FunctionsToImport;
+
+  /// Set to true if the given FunctionInfoIndex contains any functions
+  /// from this source module, in which case we must conservatively assume
+  /// that any of its functions may be imported into another module
+  /// as part of a different backend compilation process.
+  bool HasExportedFunctions = false;
+
+  /// Populated during ThinLTO global processing with locals promoted
+  /// to global scope in an exporting module, which now need to be linked
+  /// in if calling from the ModuleLinker.
+  SetVector<GlobalValue *> NewExportedValues;
+
+  /// Check if we should promote the given local value to global scope.
+  bool doPromoteLocalToGlobal(const GlobalValue *SGV);
+
   /// Helper methods to check if we are importing from or potentially
   /// exporting from the current source module.
   bool isPerformingImport() const { return FunctionsToImport != nullptr; }
@@ -143,32 +198,30 @@ class ModuleLinker {
   GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV);
 
 public:
-  ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags,
-               const FunctionInfoIndex *Index = nullptr,
-               DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
-               DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr)
-      : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index),
-        FunctionsToImport(FunctionsToImport),
-        ValIDToTempMDMap(ValIDToTempMDMap) {
-    assert((ImportIndex || !FunctionsToImport) &&
-           "Expect a FunctionInfoIndex when importing");
+  ThinLTOGlobalProcessing(
+      Module &M, const FunctionInfoIndex *Index,
+      DenseSet<const GlobalValue *> *FunctionsToImport = nullptr)
+      : M(M), ImportIndex(Index), FunctionsToImport(FunctionsToImport) {
     // If we have a FunctionInfoIndex but no function to import,
     // then this is the primary module being compiled in a ThinLTO
     // backend compilation, and we need to see if it has functions that
     // may be exported to another backend compilation.
-    if (ImportIndex && !FunctionsToImport)
-      HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM);
-    assert((ValIDToTempMDMap || !FunctionsToImport) &&
-           "Function importing must provide a ValIDToTempMDMap");
+    if (!FunctionsToImport)
+      HasExportedFunctions = ImportIndex->hasExportedFunctions(M);
   }
 
   bool run();
+
+  /// Access the promoted globals that are now exported and need to be linked.
+  SetVector<GlobalValue *> &getNewExportedValues() { return NewExportedValues; }
 };
 }
 
-bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
-  if (!isPerformingImport())
-    return false;
+/// Checks if we should import SGV as a definition, otherwise import as a
+/// declaration.
+static bool
+doImportAsDefinitionImpl(const GlobalValue *SGV,
+                         DenseSet<const GlobalValue *> *FunctionsToImport) {
   auto *GA = dyn_cast<GlobalAlias>(SGV);
   if (GA) {
     if (GA->hasWeakAnyLinkage())
@@ -176,7 +229,7 @@ bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
     const GlobalObject *GO = GA->getBaseObject();
     if (!GO->hasLinkOnceODRLinkage())
       return false;
-    return doImportAsDefinition(GO);
+    return doImportAsDefinitionImpl(GO, FunctionsToImport);
   }
   // Always import GlobalVariable definitions, except for the special
   // case of WeakAny which are imported as ExternalWeak declarations
@@ -196,7 +249,19 @@ bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
   return false;
 }
 
-bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) {
+bool ThinLTOGlobalProcessing::doImportAsDefinition(const GlobalValue *SGV) {
+  if (!isPerformingImport())
+    return false;
+  return doImportAsDefinitionImpl(SGV, FunctionsToImport);
+}
+
+bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
+  if (!isPerformingImport())
+    return false;
+  return doImportAsDefinitionImpl(SGV, FunctionsToImport);
+}
+
+bool ThinLTOGlobalProcessing::doPromoteLocalToGlobal(const GlobalValue *SGV) {
   assert(SGV->hasLocalLinkage());
   // Both the imported references and the original local variable must
   // be promoted.
@@ -220,7 +285,7 @@ bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) {
   return true;
 }
 
-std::string ModuleLinker::getName(const GlobalValue *SGV) {
+std::string ThinLTOGlobalProcessing::getName(const GlobalValue *SGV) {
   // For locals that must be promoted to global scope, ensure that
   // the promoted name uniquely identifies the copy in the original module,
   // using the ID assigned during combined index creation. When importing,
@@ -234,7 +299,8 @@ std::string ModuleLinker::getName(const GlobalValue *SGV) {
   return SGV->getName();
 }
 
-GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) {
+GlobalValue::LinkageTypes
+ThinLTOGlobalProcessing::getLinkage(const GlobalValue *SGV) {
   // Any local variable that is referenced by an exported function needs
   // to be promoted to global scope. Since we don't currently know which
   // functions reference which local variables/functions, we must treat
@@ -298,8 +364,7 @@ GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) {
     // since it would cause global constructors/destructors to be
     // executed multiple times. This should have already been handled
     // by linkIfNeeded, and we will assert in shouldLinkFromSource
-    // if we try to import, so we simply return AppendingLinkage here
-    // as this helper is called more widely in getLinkedToGlobal.
+    // if we try to import, so we simply return AppendingLinkage.
     return GlobalValue::AppendingLinkage;
 
   case GlobalValue::InternalLinkage:
@@ -652,7 +717,7 @@ void ModuleLinker::addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add) {
   }
 }
 
-void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) {
+void ThinLTOGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
   if (GV.hasLocalLinkage() &&
       (doPromoteLocalToGlobal(&GV) || isPerformingImport())) {
     GV.setName(getName(&GV));
@@ -660,21 +725,26 @@ void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) {
     if (!GV.hasLocalLinkage())
       GV.setVisibility(GlobalValue::HiddenVisibility);
     if (isModuleExporting())
-      ValuesToLink.insert(&GV);
+      NewExportedValues.insert(&GV);
     return;
   }
   GV.setLinkage(getLinkage(&GV));
 }
 
-void ModuleLinker::processGlobalsForThinLTO() {
-  for (GlobalVariable &GV : SrcM.globals())
+void ThinLTOGlobalProcessing::processGlobalsForThinLTO() {
+  for (GlobalVariable &GV : M.globals())
     processGlobalForThinLTO(GV);
-  for (Function &SF : SrcM)
+  for (Function &SF : M)
     processGlobalForThinLTO(SF);
-  for (GlobalAlias &GA : SrcM.aliases())
+  for (GlobalAlias &GA : M.aliases())
     processGlobalForThinLTO(GA);
 }
 
+bool ThinLTOGlobalProcessing::run() {
+  processGlobalsForThinLTO();
+  return false;
+}
+
 bool ModuleLinker::run() {
   for (const auto &SMEC : SrcM.getComdatSymbolTable()) {
     const Comdat &C = SMEC.getValue();
@@ -713,7 +783,14 @@ bool ModuleLinker::run() {
     if (linkIfNeeded(GA))
       return true;
 
-  processGlobalsForThinLTO();
+  if (ImportIndex) {
+    ThinLTOGlobalProcessing ThinLTOProcessing(SrcM, ImportIndex,
+                                              FunctionsToImport);
+    if (ThinLTOProcessing.run())
+      return true;
+    for (auto *GV : ThinLTOProcessing.getNewExportedValues())
+      ValuesToLink.insert(GV);
+  }
 
   for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
     GlobalValue *GV = ValuesToLink[I];
@@ -786,15 +863,9 @@ bool Linker::linkModules(Module &Dest, std::unique_ptr<Module> Src,
   return L.linkInModule(std::move(Src), Flags);
 }
 
-std::unique_ptr<Module>
-llvm::renameModuleForThinLTO(std::unique_ptr<Module> M,
-                             const FunctionInfoIndex *Index) {
-  std::unique_ptr<llvm::Module> RenamedModule(
-      new llvm::Module(M->getModuleIdentifier(), M->getContext()));
-  Linker L(*RenamedModule.get());
-  if (L.linkInModule(std::move(M), llvm::Linker::Flags::None, Index))
-    return nullptr;
-  return RenamedModule;
+bool llvm::renameModuleForThinLTO(Module &M, const FunctionInfoIndex *Index) {
+  ThinLTOGlobalProcessing ThinLTOProcessing(M, Index);
+  return ThinLTOProcessing.run();
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index 0f26b38..748644b 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -300,6 +300,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_Hexagon_LD_PLT: return "LDPLT";
   case VK_Hexagon_IE: return "IE";
   case VK_Hexagon_IE_GOT: return "IEGOT";
+  case VK_WebAssembly_FUNCTION: return "FUNCTION";
   case VK_TPREL: return "tprel";
   case VK_DTPREL: return "dtprel";
   }
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index 34f49ca..f86f7e4 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -256,6 +256,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
   DwarfRangesSection =
       Ctx->getMachOSection("__DWARF", "__debug_ranges", MachO::S_ATTR_DEBUG,
                            SectionKind::getMetadata(), "debug_range");
+  DwarfMacinfoSection =
+      Ctx->getMachOSection("__DWARF", "__debug_macinfo", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
   DwarfDebugInlineSection =
       Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG,
                            SectionKind::getMetadata());
@@ -505,6 +508,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
       Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0);
   DwarfRangesSection =
       Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, "debug_range");
+  DwarfMacinfoSection =
+      Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0);
 
   // DWARF5 Experimental Debug Info
 
@@ -684,6 +689,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
       COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
           COFF::IMAGE_SCN_MEM_READ,
       SectionKind::getMetadata(), "debug_range");
+  DwarfMacinfoSection = Ctx->getCOFFSection(
+      ".debug_macinfo",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
   DwarfInfoDWOSection = Ctx->getCOFFSection(
       ".debug_info.dwo",
       COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index d0a7daf..972610a 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -64,8 +64,6 @@ void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
     return;
   }
 
-  assert(Hi->getOffset() >= Lo->getOffset() &&
-         "Expected Hi to be greater than Lo");
   EmitIntValue(Hi->getOffset() - Lo->getOffset(), Size);
 }
 
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index a382090..a76cbdb 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -969,9 +969,6 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
 
   Header.PointerToSymbolTable = offset;
 
-  // FIXME: Remove the #else branch and make the #if branch unconditional once
-  // LLVM's self host configuration is aware of /Brepro.
-#if (ENABLE_TIMESTAMPS == 1)
   // MS LINK expects to be able to use this timestamp to implement their
   // /INCREMENTAL feature.
   if (Asm.isIncrementalLinkerCompatible()) {
@@ -980,12 +977,9 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
       Now = UINT32_MAX;
     Header.TimeDateStamp = Now;
   } else {
+    // Have deterministic output if /INCREMENTAL isn't needed. Also matches GNU.
     Header.TimeDateStamp = 0;
   }
-#else
-  // We want a deterministic output. It looks like GNU as also writes 0 in here.
-  Header.TimeDateStamp = 0;
-#endif
 
   // Write it all to disk...
   WriteFileHeader(Header);
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index 1f21117..4cd6aff 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -1336,6 +1336,30 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
   return std::error_code();
 }
 
+std::error_code ExportDirectoryEntryRef::isForwarder(bool &Result) const {
+  const data_directory *DataEntry;
+  if (auto EC = OwningObject->getDataDirectory(COFF::EXPORT_TABLE, DataEntry))
+    return EC;
+  uint32_t RVA;
+  if (auto EC = getExportRVA(RVA))
+    return EC;
+  uint32_t Begin = DataEntry->RelativeVirtualAddress;
+  uint32_t End = DataEntry->RelativeVirtualAddress + DataEntry->Size;
+  Result = (Begin <= RVA && RVA < End);
+  return std::error_code();
+}
+
+std::error_code ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const {
+  uint32_t RVA;
+  if (auto EC = getExportRVA(RVA))
+    return EC;
+  uintptr_t IntPtr = 0;
+  if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr))
+    return EC;
+  Result = StringRef(reinterpret_cast<const char *>(IntPtr));
+  return std::error_code();
+}
+
 bool ImportedSymbolRef::
 operator==(const ImportedSymbolRef &Other) const {
   return Entry32 == Other.Entry32 && Entry64 == Other.Entry64
diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp
index 62c27cc..12b772d 100644
--- a/contrib/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm/lib/Object/ELF.cpp
@@ -91,6 +91,13 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
       break;
     }
     break;
+  case ELF::EM_WEBASSEMBLY:
+    switch (Type) {
+#include "llvm/Support/ELFRelocs/WebAssembly.def"
+    default:
+      break;
+    }
+    break;
   default:
     break;
   }
diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
index 55c0fb4..f5d477b 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
@@ -517,6 +517,6 @@ class CoverageMappingErrorCategoryType : public std::error_category {
 
 static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory;
 
-const std::error_category &llvm::coveragemap_category() {
+const std::error_category &llvm::coverage::coveragemap_category() {
   return *ErrorCategory;
 }
diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
index 32c692d..89e1cf4 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
@@ -319,21 +319,14 @@ static std::error_code readCoverageMappingData(
     if (Buf + sizeof(CovMapHeader) > End)
       return coveragemap_error::malformed;
     auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf);
-    uint32_t NRecords =
-        endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords);
-    uint32_t FilenamesSize =
-        endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize);
-    uint32_t CoverageSize =
-        endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize);
-    uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version);
+    uint32_t NRecords = CovHeader->getNRecords<Endian>();
+    uint32_t FilenamesSize = CovHeader->getFilenamesSize<Endian>();
+    uint32_t CoverageSize = CovHeader->getCoverageSize<Endian>();
+    uint32_t Version = CovHeader->getVersion<Endian>();
     Buf = reinterpret_cast<const char *>(++CovHeader);
 
-    switch (Version) {
-    case CoverageMappingVersion1:
-      break;
-    default:
+    if (Version > coverage::CoverageMappingCurrentVersion)
       return coveragemap_error::unsupported_version;
-    }
 
     // Skip past the function records, saving the start and end for later.
     const char *FunBuf = Buf;
@@ -364,11 +357,8 @@ static std::error_code readCoverageMappingData(
         reinterpret_cast<const coverage::CovMapFunctionRecord<T> *>(FunBuf);
     while ((const char *)CFR < FunEnd) {
       // Read the function information
-      T NamePtr = endian::byte_swap<T, Endian>(CFR->NamePtr);
-      uint32_t NameSize = endian::byte_swap<uint32_t, Endian>(CFR->NameSize);
-      uint32_t DataSize = endian::byte_swap<uint32_t, Endian>(CFR->DataSize);
-      uint64_t FuncHash = endian::byte_swap<uint64_t, Endian>(CFR->FuncHash);
-      CFR++;
+      uint32_t DataSize = CFR->template getDataSize<Endian>();
+      uint64_t FuncHash = CFR->template getFuncHash<Endian>();
 
       // Now use that to read the coverage data.
       if (CovBuf + DataSize > CovEnd)
@@ -379,16 +369,18 @@ static std::error_code readCoverageMappingData(
       // Ignore this record if we already have a record that points to the same
       // function name. This is useful to ignore the redundant records for the
       // functions with ODR linkage.
-      if (!UniqueFunctionMappingData.insert(NamePtr).second)
+      T NameRef = CFR->template getFuncNameRef<Endian>();
+      if (!UniqueFunctionMappingData.insert(NameRef).second)
         continue;
 
-      // Finally, grab the name and create a record.
-      StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize);
-      if (NameSize && FuncName.empty())
-        return coveragemap_error::malformed;
+      StringRef FuncName;
+      if (std::error_code EC =
+              CFR->template getFuncName<Endian>(ProfileNames, FuncName))
+        return EC;
       Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
           CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
           FilenamesBegin, Filenames.size() - FilenamesBegin));
+      CFR++;
     }
   }
 
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index 027f0f7..d677763 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -257,9 +257,8 @@ int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
   return 0;
 }
 
-instrprof_error
-InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
-                                         uint64_t Weight) {
+instrprof_error InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
+                                                uint64_t Weight) {
   this->sortByTargetValues();
   Input.sortByTargetValues();
   auto I = ValueData.begin();
@@ -270,14 +269,8 @@ InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
     while (I != IE && I->Value < J->Value)
       ++I;
     if (I != IE && I->Value == J->Value) {
-      uint64_t JCount = J->Count;
       bool Overflowed;
-      if (Weight > 1) {
-        JCount = SaturatingMultiply(JCount, Weight, &Overflowed);
-        if (Overflowed)
-          Result = instrprof_error::counter_overflow;
-      }
-      I->Count = SaturatingAdd(I->Count, JCount, &Overflowed);
+      I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed);
       if (Overflowed)
         Result = instrprof_error::counter_overflow;
       ++I;
@@ -288,6 +281,17 @@ InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
   return Result;
 }
 
+instrprof_error InstrProfValueSiteRecord::scale(uint64_t Weight) {
+  instrprof_error Result = instrprof_error::success;
+  for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
+    bool Overflowed;
+    I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed);
+    if (Overflowed)
+      Result = instrprof_error::counter_overflow;
+  }
+  return Result;
+}
+
 // Merge Value Profile data from Src record to this record for ValueKind.
 // Scale merged value counts by \p Weight.
 instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
@@ -303,8 +307,7 @@ instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
       Src.getValueSitesForKind(ValueKind);
   instrprof_error Result = instrprof_error::success;
   for (uint32_t I = 0; I < ThisNumValueSites; I++)
-    MergeResult(Result,
-                ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight));
+    MergeResult(Result, ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight));
   return Result;
 }
 
@@ -319,13 +322,8 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other,
 
   for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
     bool Overflowed;
-    uint64_t OtherCount = Other.Counts[I];
-    if (Weight > 1) {
-      OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed);
-      if (Overflowed)
-        Result = instrprof_error::counter_overflow;
-    }
-    Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed);
+    Counts[I] =
+        SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed);
     if (Overflowed)
       Result = instrprof_error::counter_overflow;
   }
@@ -336,6 +334,32 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other,
   return Result;
 }
 
+instrprof_error InstrProfRecord::scaleValueProfData(uint32_t ValueKind,
+                                                    uint64_t Weight) {
+  uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
+  std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
+      getValueSitesForKind(ValueKind);
+  instrprof_error Result = instrprof_error::success;
+  for (uint32_t I = 0; I < ThisNumValueSites; I++)
+    MergeResult(Result, ThisSiteRecords[I].scale(Weight));
+  return Result;
+}
+
+instrprof_error InstrProfRecord::scale(uint64_t Weight) {
+  instrprof_error Result = instrprof_error::success;
+  for (auto &Count : this->Counts) {
+    bool Overflowed;
+    Count = SaturatingMultiply(Count, Weight, &Overflowed);
+    if (Overflowed && Result == instrprof_error::success) {
+      Result = instrprof_error::counter_overflow;
+    }
+  }
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+    MergeResult(Result, scaleValueProfData(Kind, Weight));
+
+  return Result;
+}
+
 // Map indirect call target name hash to name string.
 uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
                                      ValueMapType *ValueMap) {
diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
index 9bb03e1..f522724 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -104,27 +104,21 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I,
       ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord()));
   InstrProfRecord &Dest = Where->second;
 
-  instrprof_error Result;
+  instrprof_error Result = instrprof_error::success;
   if (NewFunc) {
     // We've never seen a function with this name and hash, add it.
     Dest = std::move(I);
     // Fix up the name to avoid dangling reference.
     Dest.Name = FunctionData.find(Dest.Name)->getKey();
-    Result = instrprof_error::success;
-    if (Weight > 1) {
-      for (auto &Count : Dest.Counts) {
-        bool Overflowed;
-        Count = SaturatingMultiply(Count, Weight, &Overflowed);
-        if (Overflowed && Result == instrprof_error::success) {
-          Result = instrprof_error::counter_overflow;
-        }
-      }
-    }
+    if (Weight > 1)
+      Result = Dest.scale(Weight);
   } else {
     // We're updating a function we've seen before.
     Result = Dest.merge(I, Weight);
   }
 
+  Dest.sortValueData();
+
   // We keep track of the max function count as we go for simplicity.
   // Update this statistic no matter the result of the merge.
   if (Dest.Counts[0] > MaxFunctionCount)
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
index 47751fc..323d532 100644
--- a/contrib/llvm/lib/Support/Debug.cpp
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -95,7 +95,10 @@ struct DebugOnlyOpt {
     if (Val.empty())
       return;
     DebugFlag = true;
-    CurrentDebugType->push_back(Val);
+    SmallVector<StringRef,8> dbgTypes;
+    StringRef(Val).split(dbgTypes, ',', -1, false);
+    for (auto dbgType : dbgTypes)
+      CurrentDebugType->push_back(dbgType);
   }
 };
 
@@ -104,10 +107,9 @@ struct DebugOnlyOpt {
 static DebugOnlyOpt DebugOnlyOptLoc;
 
 static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
-DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+DebugOnly("debug-only", cl::desc("Enable a specific type of debug output (comma separated list of types)"),
           cl::Hidden, cl::ZeroOrMore, cl::value_desc("debug string"),
           cl::location(DebugOnlyOptLoc), cl::ValueRequired);
-
 // Signal handlers - dump debug output on termination.
 static void debug_user_sig_handler(void *Cookie) {
   // This is a bit sneaky.  Since this is under #ifndef NDEBUG, we
diff --git a/contrib/llvm/lib/Support/IntEqClasses.cpp b/contrib/llvm/lib/Support/IntEqClasses.cpp
index 1134495..ff21357 100644
--- a/contrib/llvm/lib/Support/IntEqClasses.cpp
+++ b/contrib/llvm/lib/Support/IntEqClasses.cpp
@@ -29,7 +29,7 @@ void IntEqClasses::grow(unsigned N) {
     EC.push_back(EC.size());
 }
 
-void IntEqClasses::join(unsigned a, unsigned b) {
+unsigned IntEqClasses::join(unsigned a, unsigned b) {
   assert(NumClasses == 0 && "join() called after compress().");
   unsigned eca = EC[a];
   unsigned ecb = EC[b];
@@ -41,6 +41,8 @@ void IntEqClasses::join(unsigned a, unsigned b) {
       EC[b] = eca, b = ecb, ecb = EC[b];
     else
       EC[a] = ecb, a = eca, eca = EC[a];
+
+  return eca;
 }
 
 unsigned IntEqClasses::findLeader(unsigned a) const {
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index 3bb1116..0e5d3ac 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -1154,8 +1154,6 @@ Triple Triple::get32BitArchVariant() const {
   Triple T(*this);
   switch (getArch()) {
   case Triple::UnknownArch:
-  case Triple::aarch64:
-  case Triple::aarch64_be:
   case Triple::amdgcn:
   case Triple::avr:
   case Triple::bpfel:
@@ -1191,17 +1189,19 @@ Triple Triple::get32BitArchVariant() const {
     // Already 32-bit.
     break;
 
-  case Triple::le64:      T.setArch(Triple::le32);    break;
-  case Triple::mips64:    T.setArch(Triple::mips);    break;
-  case Triple::mips64el:  T.setArch(Triple::mipsel);  break;
-  case Triple::nvptx64:   T.setArch(Triple::nvptx);   break;
-  case Triple::ppc64:     T.setArch(Triple::ppc);     break;
-  case Triple::sparcv9:   T.setArch(Triple::sparc);   break;
-  case Triple::x86_64:    T.setArch(Triple::x86);     break;
-  case Triple::amdil64:   T.setArch(Triple::amdil);   break;
-  case Triple::hsail64:   T.setArch(Triple::hsail);   break;
-  case Triple::spir64:    T.setArch(Triple::spir);    break;
-  case Triple::wasm64:    T.setArch(Triple::wasm32);  break;
+  case Triple::aarch64:    T.setArch(Triple::arm);     break;
+  case Triple::aarch64_be: T.setArch(Triple::armeb);   break;
+  case Triple::le64:       T.setArch(Triple::le32);    break;
+  case Triple::mips64:     T.setArch(Triple::mips);    break;
+  case Triple::mips64el:   T.setArch(Triple::mipsel);  break;
+  case Triple::nvptx64:    T.setArch(Triple::nvptx);   break;
+  case Triple::ppc64:      T.setArch(Triple::ppc);     break;
+  case Triple::sparcv9:    T.setArch(Triple::sparc);   break;
+  case Triple::x86_64:     T.setArch(Triple::x86);     break;
+  case Triple::amdil64:    T.setArch(Triple::amdil);   break;
+  case Triple::hsail64:    T.setArch(Triple::hsail);   break;
+  case Triple::spir64:     T.setArch(Triple::spir);    break;
+  case Triple::wasm64:     T.setArch(Triple::wasm32);  break;
   }
   return T;
 }
@@ -1210,16 +1210,12 @@ Triple Triple::get64BitArchVariant() const {
   Triple T(*this);
   switch (getArch()) {
   case Triple::UnknownArch:
-  case Triple::arm:
-  case Triple::armeb:
   case Triple::avr:
   case Triple::hexagon:
   case Triple::kalimba:
   case Triple::msp430:
   case Triple::r600:
   case Triple::tce:
-  case Triple::thumb:
-  case Triple::thumbeb:
   case Triple::xcore:
   case Triple::sparcel:
   case Triple::shave:
@@ -1247,17 +1243,21 @@ Triple Triple::get64BitArchVariant() const {
     // Already 64-bit.
     break;
 
-  case Triple::le32:    T.setArch(Triple::le64);      break;
-  case Triple::mips:    T.setArch(Triple::mips64);    break;
-  case Triple::mipsel:  T.setArch(Triple::mips64el);  break;
-  case Triple::nvptx:   T.setArch(Triple::nvptx64);   break;
-  case Triple::ppc:     T.setArch(Triple::ppc64);     break;
-  case Triple::sparc:   T.setArch(Triple::sparcv9);   break;
-  case Triple::x86:     T.setArch(Triple::x86_64);    break;
-  case Triple::amdil:   T.setArch(Triple::amdil64);   break;
-  case Triple::hsail:   T.setArch(Triple::hsail64);   break;
-  case Triple::spir:    T.setArch(Triple::spir64);    break;
-  case Triple::wasm32:  T.setArch(Triple::wasm64);    break;
+  case Triple::arm:     T.setArch(Triple::aarch64);    break;
+  case Triple::armeb:   T.setArch(Triple::aarch64_be); break;
+  case Triple::le32:    T.setArch(Triple::le64);       break;
+  case Triple::mips:    T.setArch(Triple::mips64);     break;
+  case Triple::mipsel:  T.setArch(Triple::mips64el);   break;
+  case Triple::nvptx:   T.setArch(Triple::nvptx64);    break;
+  case Triple::ppc:     T.setArch(Triple::ppc64);      break;
+  case Triple::sparc:   T.setArch(Triple::sparcv9);    break;
+  case Triple::x86:     T.setArch(Triple::x86_64);     break;
+  case Triple::amdil:   T.setArch(Triple::amdil64);    break;
+  case Triple::hsail:   T.setArch(Triple::hsail64);    break;
+  case Triple::spir:    T.setArch(Triple::spir64);     break;
+  case Triple::thumb:   T.setArch(Triple::aarch64);    break;
+  case Triple::thumbeb: T.setArch(Triple::aarch64_be); break;
+  case Triple::wasm32:  T.setArch(Triple::wasm64);     break;
   }
   return T;
 }
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
index 4e48412..5ef77b1 100644
--- a/contrib/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -38,6 +38,7 @@ typedef int errno_t;
 
 #ifdef _MSC_VER
 # pragma comment(lib, "advapi32.lib")  // This provides CryptAcquireContextW.
+# pragma comment(lib, "ole32.lib")     // This provides CoTaskMemFree
 #endif
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
index d109a66..f40ca72 100644
--- a/contrib/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -405,7 +405,10 @@ static void RegisterHandler() {
   // If we cannot load up the APIs (which would be unexpected as they should
   // exist on every version of Windows we support), we will bail out since
   // there would be nothing to report.
-  assert(load64BitDebugHelp() && "These APIs should always be available");
+  if (!load64BitDebugHelp()) {
+    assert(false && "These APIs should always be available");
+    return;
+  }
 
   if (RegisteredUnhandledExceptionFilter) {
     EnterCriticalSection(&CriticalSection);
diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
index c65e314..60490f2 100644
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@@ -47,20 +47,27 @@
 #include <string>
 #include <vector>
 
-#if !defined(__CYGWIN__) && !defined(__MINGW32__)
-#include <VersionHelpers.h>
-#else
-// Cygwin does not have the IsWindows8OrGreater() API.
-// Some version of mingw does not have the API either.
-inline bool IsWindows8OrGreater() {
-  OSVERSIONINFO osvi = {};
+/// Determines if the program is running on Windows 8 or newer. This
+/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
+/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't
+/// yet have VersionHelpers.h, so we have our own helper.
+inline bool RunningWindows8OrGreater() {
+  // Windows 8 is version 6.2, service pack 0.
+  OSVERSIONINFOEXW osvi = {};
   osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-  if (!::GetVersionEx(&osvi))
-    return false;
-  return (osvi.dwMajorVersion > 6 ||
-          (osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2));
+  osvi.dwMajorVersion = 6;
+  osvi.dwMinorVersion = 2;
+  osvi.wServicePackMajor = 0;
+
+  DWORDLONG Mask = 0;
+  Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL);
+  Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL);
+  Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
+
+  return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION |
+                                       VER_SERVICEPACKMAJOR,
+                            Mask) != FALSE;
 }
-#endif // __CYGWIN__
 
 inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
   if (!ErrMsg)
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index 57162dc..15813fd 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -577,7 +577,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
   // Writing a large size of output to Windows console returns ENOMEM. It seems
   // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
   // the latter has a size limit (66000 bytes or less, depending on heap usage).
-  bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater();
+  bool ShouldWriteInChunks = !!::_isatty(FD) && !RunningWindows8OrGreater();
 #endif
 
   do {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 3ef3c8b..f398117 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2487,15 +2487,36 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
   return true;
 }
 
-/// Return true when there is potentially a faster code sequence
-/// for an instruction chain ending in \p Root. All potential patterns are
-/// listed
-/// in the \p Pattern vector. Pattern should be sorted in priority order since
-/// the pattern evaluator stops checking as soon as it finds a faster sequence.
+// TODO: There are many more machine instruction opcodes to match:
+//       1. Other data types (integer, vectors)
+//       2. Other math / logic operations (xor, or)
+//       3. Other forms of the same operation (intrinsics and other variants)
+bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+  switch (Inst.getOpcode()) {
+  case AArch64::FADDDrr:
+  case AArch64::FADDSrr:
+  case AArch64::FADDv2f32:
+  case AArch64::FADDv2f64:
+  case AArch64::FADDv4f32:
+  case AArch64::FMULDrr:
+  case AArch64::FMULSrr:
+  case AArch64::FMULX32:
+  case AArch64::FMULX64:
+  case AArch64::FMULXv2f32:
+  case AArch64::FMULXv2f64:
+  case AArch64::FMULXv4f32:
+  case AArch64::FMULv2f32:
+  case AArch64::FMULv2f64:
+  case AArch64::FMULv4f32:
+    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
+  default:
+    return false;
+  }
+}
 
-bool AArch64InstrInfo::getMachineCombinerPatterns(
-    MachineInstr &Root,
-    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+/// Find instructions that can be turned into madd.
+static bool getMaddPatterns(MachineInstr &Root,
+                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
   unsigned Opc = Root.getOpcode();
   MachineBasicBlock &MBB = *Root.getParent();
   bool Found = false;
@@ -2600,6 +2621,20 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
   return Found;
 }
 
+/// Return true when there is potentially a faster code sequence for an
+/// instruction chain ending in \p Root. All potential patterns are listed in
+/// the \p Pattern vector. Pattern should be sorted in priority order since the
+/// pattern evaluator stops checking as soon as it finds a faster sequence.
+
+bool AArch64InstrInfo::getMachineCombinerPatterns(
+    MachineInstr &Root,
+    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+  if (getMaddPatterns(Root, Patterns))
+    return true;
+
+  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+}
+
 /// genMadd - Generate madd instruction and combine mul and add.
 /// Example:
 ///  MUL I=A,B,0
@@ -2713,8 +2748,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
   unsigned Opc;
   switch (Pattern) {
   default:
-    // signal error.
-    break;
+    // Reassociate instructions.
+    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+                                                DelInstrs, InstrIdxForVirtReg);
+    return;
   case MachineCombinerPattern::MULADDW_OP1:
   case MachineCombinerPattern::MULADDX_OP1:
     // MUL I=A,B,0
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index ae02822..b5bb446 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -169,7 +169,9 @@ public:
   bool getMachineCombinerPatterns(MachineInstr &Root,
                   SmallVectorImpl<MachineCombinerPattern> &Patterns)
       const override;
-
+  /// Return true when Inst is associative and commutative so that it can be
+  /// reassociated.
+  bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
   /// When getMachineCombinerPatterns() finds patterns, this function generates
   /// the instructions that could replace the original code sequence
   void genAlternativeCodeSequence(
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
index 8c3cb56..5d00e1c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -20,8 +20,10 @@ class AMDGPUInstrPrinter;
 class AMDGPUSubtarget;
 class AMDGPUTargetMachine;
 class FunctionPass;
+class MachineSchedContext;
 class MCAsmInfo;
 class raw_ostream;
+class ScheduleDAGInstrs;
 class Target;
 class TargetMachine;
 
@@ -49,6 +51,8 @@ FunctionPass *createSIFixSGPRLiveRangesPass();
 FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
 FunctionPass *createSIInsertWaits(TargetMachine &tm);
 
+ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C);
+
 ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
 extern char &AMDGPUAnnotateKernelFeaturesID;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 9c37902..1239dfb2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -91,6 +91,25 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                    std::unique_ptr<MCStreamer> Streamer)
     : AsmPrinter(TM, std::move(Streamer)) {}
 
+void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+    return;
+
+  // Need to construct an MCSubtargetInfo here in case we have no functions
+  // in the module.
+  std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+        TM.getTargetTriple().str(), TM.getTargetCPU(),
+        TM.getTargetFeatureString()));
+
+  AMDGPUTargetStreamer *TS =
+      static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+
+  TS->EmitDirectiveHSACodeObjectVersion(1, 0);
+  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
+  TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
+                                    "AMD", "AMDGPU");
+}
+
 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
@@ -148,11 +167,15 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName());
   }
 
+  MCSymbolELF *GVSym = cast<MCSymbolELF>(getSymbol(GV));
   const DataLayout &DL = getDataLayout();
+
+  // Emit the size
+  uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+  OutStreamer->emitELFSize(GVSym, MCConstantExpr::create(Size, OutContext));
   OutStreamer->PushSection();
   OutStreamer->SwitchSection(
       getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
-  MCSymbol *GVSym = getSymbol(GV);
   const Constant *C = GV->getInitializer();
   OutStreamer->EmitLabel(GVSym);
   EmitGlobalConstant(DL, C);
@@ -178,13 +201,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     if (!STM.isAmdHsaOS()) {
       EmitProgramInfoSI(MF, KernelInfo);
     }
-    // Emit directives
-    AMDGPUTargetStreamer *TS =
-        static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
-    TS->EmitDirectiveHSACodeObjectVersion(1, 0);
-    AMDGPU::IsaVersion ISA = STM.getIsaVersion();
-    TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
-                                      "AMD", "AMDGPU");
   } else {
     EmitProgramInfoR600(MF);
   }
@@ -417,16 +433,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
     }
   }
 
-  if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) {
-    MaxSGPR += 2;
+  unsigned ExtraSGPRs = 0;
 
-    if (FlatUsed)
-      MaxSGPR += 2;
+  if (VCCUsed)
+    ExtraSGPRs = 2;
 
+  if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    if (FlatUsed)
+      ExtraSGPRs = 4;
+  } else {
     if (STM.isXNACKEnabled())
-      MaxSGPR += 2;
+      ExtraSGPRs = 4;
+
+    if (FlatUsed)
+      ExtraSGPRs = 6;
   }
 
+  MaxSGPR += ExtraSGPRs;
+
   // We found the maximum register index. They start at 0, so add one to get the
   // number of registers.
   ProgInfo.NumVGPR = MaxVGPR + 1;
@@ -563,7 +587,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
     OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
     OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
     OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
-    OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
+    OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
+    OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
+    OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
   }
 }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 817cbfc..99d4091 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -103,6 +103,8 @@ public:
 
   void EmitGlobalVariable(const GlobalVariable *GV) override;
 
+  void EmitStartOfAsmFile(Module &M) override;
+
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
                        raw_ostream &O) override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 6ffa7a0..b0db261 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -20,28 +20,83 @@ def CC_SI : CallingConv<[
   CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
     SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
     SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
-    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21
+    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
+    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
+    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
   ]>>>,
 
   CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
-    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
-    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
+      SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
+      SGPR32, SGPR34, SGPR36, SGPR38 ],
+    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
+      SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
+      SGPR33, SGPR35, SGPR37, SGPR39 ]
   >>>,
 
+  // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
   CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
     VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
     VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
     VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
-    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
+    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
+    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
+    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
+    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
+    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
+    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
+    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
+    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
+    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
+    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
+    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
+    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
+    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
   ]>>>,
 
   CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
-    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
-    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
+      SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
+      SGPR32, SGPR34, SGPR36, SGPR38 ],
+    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
+      SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
+      SGPR33, SGPR35, SGPR37, SGPR39 ]
   >>>
 
 ]>;
 
+def RetCC_SI : CallingConv<[
+  CCIfType<[i32] , CCAssignToReg<[
+    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
+    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
+    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
+  ]>>,
+
+  // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
+  CCIfType<[f32] , CCAssignToReg<[
+    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
+    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
+    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
+    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
+    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
+    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
+    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
+    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
+    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
+    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
+    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
+    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
+    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
+    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
+  ]>>
+]>;
+
 // Calling convention for R600
 def CC_R600 : CallingConv<[
   CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 222f631..1a59a46 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -282,12 +282,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
   setOperationAction(ISD::SMAX, MVT::i32, Legal);
   setOperationAction(ISD::UMAX, MVT::i32, Legal);
 
-  if (!Subtarget->hasFFBH())
+  if (Subtarget->hasFFBH())
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+  else
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
 
   if (!Subtarget->hasFFBL())
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
 
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+
+  setOperationAction(ISD::CTLZ, MVT::i64, Custom);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+
   static const MVT::SimpleValueType VectorIntTypes[] = {
     MVT::v2i32, MVT::v4i32
   };
@@ -565,6 +572,12 @@ void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
   State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
 }
 
+void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs) const {
+
+  State.AnalyzeReturn(Outs, RetCC_SI);
+}
+
 SDValue AMDGPUTargetLowering::LowerReturn(
                                      SDValue Chain,
                                      CallingConv::ID CallConv,
@@ -633,6 +646,9 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
   case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+  case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
+    return LowerCTLZ(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   }
   return Op;
@@ -2159,6 +2175,145 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
 }
 
+SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+  bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
+
+  if (ZeroUndef && Src.getValueType() == MVT::i32)
+    return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Src);
+
+  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
+
+  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
+                                   *DAG.getContext(), MVT::i32);
+
+  SDValue Hi0 = DAG.getSetCC(SL, SetCCVT, Hi, Zero, ISD::SETEQ);
+
+  SDValue CtlzLo = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Lo);
+  SDValue CtlzHi = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Hi);
+
+  const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32);
+  SDValue Add = DAG.getNode(ISD::ADD, SL, MVT::i32, CtlzLo, Bits32);
+
+  // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
+  SDValue NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0, Add, CtlzHi);
+
+  if (!ZeroUndef) {
+    // Test if the full 64-bit input is zero.
+
+    // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
+    // which we probably don't want.
+    SDValue Lo0 = DAG.getSetCC(SL, SetCCVT, Lo, Zero, ISD::SETEQ);
+    SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0, Hi0);
+
+    // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
+    // with the same cycles, otherwise it is slower.
+    // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src,
+    // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ);
+
+    const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32);
+
+    // The instruction returns -1 for 0 input, but the defined intrinsic
+    // behavior is to return the number of bits.
+    NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32,
+                          SrcIsZero, Bits32, NewCtlz);
+  }
+
+  return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewCtlz);
+}
+
+SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
+                                               bool Signed) const {
+  // Unsigned
+  // cul2f(ulong u)
+  //{
+  //  uint lz = clz(u);
+  //  uint e = (u != 0) ? 127U + 63U - lz : 0;
+  //  u = (u << lz) & 0x7fffffffffffffffUL;
+  //  ulong t = u & 0xffffffffffUL;
+  //  uint v = (e << 23) | (uint)(u >> 40);
+  //  uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+  //  return as_float(v + r);
+  //}
+  // Signed
+  // cl2f(long l)
+  //{
+  //  long s = l >> 63;
+  //  float r = cul2f((l + s) ^ s);
+  //  return s ? -r : r;
+  //}
+
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+  SDValue L = Src;
+
+  SDValue S;
+  if (Signed) {
+    const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64);
+    S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit);
+
+    SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S);
+    L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S);
+  }
+
+  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
+                                   *DAG.getContext(), MVT::f32);
+
+
+  SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
+  SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64);
+  SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L);
+  LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ);
+
+  SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32);
+  SDValue E = DAG.getSelect(SL, MVT::i32,
+    DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE),
+    DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ),
+    ZeroI32);
+
+  SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64,
+    DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ),
+    DAG.getConstant((-1ULL) >> 1, SL, MVT::i64));
+
+  SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U,
+                          DAG.getConstant(0xffffffffffULL, SL, MVT::i64));
+
+  SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64,
+                             U, DAG.getConstant(40, SL, MVT::i64));
+
+  SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32,
+    DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)),
+    DAG.getNode(ISD::TRUNCATE, SL, MVT::i32,  UShl));
+
+  SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64);
+  SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT);
+  SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ);
+
+  SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+  SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One);
+
+  SDValue R = DAG.getSelect(SL, MVT::i32,
+    RCmp,
+    One,
+    DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32));
+  R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R);
+  R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R);
+
+  if (!Signed)
+    return R;
+
+  SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R);
+  return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R);
+}
+
 SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
                                                bool Signed) const {
   SDLoc SL(Op);
@@ -2184,35 +2339,29 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
 
 SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
                                                SelectionDAG &DAG) const {
-  SDValue S0 = Op.getOperand(0);
-  if (S0.getValueType() != MVT::i64)
-    return SDValue();
+  assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+         "operation should be legal");
 
   EVT DestVT = Op.getValueType();
   if (DestVT == MVT::f64)
     return LowerINT_TO_FP64(Op, DAG, false);
 
-  assert(DestVT == MVT::f32);
-
-  SDLoc DL(Op);
+  if (DestVT == MVT::f32)
+    return LowerINT_TO_FP32(Op, DAG, false);
 
-  // f32 uint_to_fp i64
-  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
-                           DAG.getConstant(0, DL, MVT::i32));
-  SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
-  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
-                           DAG.getConstant(1, DL, MVT::i32));
-  SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
-  // TODO: Should this propagate fast-math-flags?
-  FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
-                        DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
-  return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
+  return SDValue();
 }
 
 SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
                                               SelectionDAG &DAG) const {
-  SDValue Src = Op.getOperand(0);
-  if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
+  assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+         "operation should be legal");
+
+  EVT DestVT = Op.getValueType();
+  if (DestVT == MVT::f32)
+    return LowerINT_TO_FP32(Op, DAG, true);
+
+  if (DestVT == MVT::f64)
     return LowerINT_TO_FP64(Op, DAG, true);
 
   return SDValue();
@@ -2447,6 +2596,97 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
   return DAG.getSExtOrTrunc(Mul, DL, VT);
 }
 
+static bool isNegativeOne(SDValue Val) {
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
+    return C->isAllOnesValue();
+  return false;
+}
+
+static bool isCtlzOpc(unsigned Opc) {
+  return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
+}
+
+// Get FFBH node if the incoming op may have been type legalized from a smaller
+// type VT.
+// Need to match pre-legalized type because the generic legalization inserts the
+// add/sub between the select and compare.
+static SDValue getFFBH_U32(const TargetLowering &TLI,
+                           SelectionDAG &DAG, SDLoc SL, SDValue Op) {
+  EVT VT = Op.getValueType();
+  EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  if (LegalVT != MVT::i32)
+    return SDValue();
+
+  if (VT != MVT::i32)
+    Op = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Op);
+
+  SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Op);
+  if (VT != MVT::i32)
+    FFBH = DAG.getNode(ISD::TRUNCATE, SL, VT, FFBH);
+
+  return FFBH;
+}
+
+// The native instructions return -1 on 0 input. Optimize out a select that
+// produces -1 on 0.
+//
+// TODO: If zero is not undef, we could also do this if the output is compared
+// against the bitwidth.
+//
+// TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
+SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL,
+                                                 SDValue Cond,
+                                                 SDValue LHS,
+                                                 SDValue RHS,
+                                                 DAGCombinerInfo &DCI) const {
+  ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+  if (!CmpRhs || !CmpRhs->isNullValue())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+  SDValue CmpLHS = Cond.getOperand(0);
+
+  // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
+  if (CCOpcode == ISD::SETEQ &&
+      isCtlzOpc(RHS.getOpcode()) &&
+      RHS.getOperand(0) == CmpLHS &&
+      isNegativeOne(LHS)) {
+    return getFFBH_U32(*this, DAG, SL, CmpLHS);
+  }
+
+  // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
+  if (CCOpcode == ISD::SETNE &&
+      isCtlzOpc(LHS.getOpcode()) &&
+      LHS.getOperand(0) == CmpLHS &&
+      isNegativeOne(RHS)) {
+    return getFFBH_U32(*this, DAG, SL, CmpLHS);
+  }
+
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
+                                                   DAGCombinerInfo &DCI) const {
+  SDValue Cond = N->getOperand(0);
+  if (Cond.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDValue LHS = Cond.getOperand(0);
+  SDValue RHS = Cond.getOperand(1);
+  SDValue CC = Cond.getOperand(2);
+
+  SDValue True = N->getOperand(1);
+  SDValue False = N->getOperand(2);
+
+  if (VT == MVT::f32 && Cond.hasOneUse())
+    return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+
+  // There's no reason to not do this if the condition has other uses.
+  return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
+}
+
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -2471,23 +2711,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
     simplifyI24(N1, DCI);
     return SDValue();
   }
-  case ISD::SELECT: {
-    SDValue Cond = N->getOperand(0);
-    if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
-      EVT VT = N->getValueType(0);
-      SDValue LHS = Cond.getOperand(0);
-      SDValue RHS = Cond.getOperand(1);
-      SDValue CC = Cond.getOperand(2);
-
-      SDValue True = N->getOperand(1);
-      SDValue False = N->getOperand(2);
-
-      if (VT == MVT::f32)
-        return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
-    }
-
-    break;
-  }
+  case ISD::SELECT:
+    return performSelectCombine(N, DCI);
   case AMDGPUISD::BFE_I32:
   case AMDGPUISD::BFE_U32: {
     assert(!N->getValueType(0).isVector() &&
@@ -2699,6 +2924,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BFE_I32)
   NODE_NAME_CASE(BFI)
   NODE_NAME_CASE(BFM)
+  NODE_NAME_CASE(FFBH_U32)
   NODE_NAME_CASE(MUL_U24)
   NODE_NAME_CASE(MUL_I24)
   NODE_NAME_CASE(MAD_U24)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 7314cc0..3792541 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -54,6 +54,9 @@ private:
   SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const;
   SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
   SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
@@ -67,6 +70,9 @@ private:
   SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS,
+                             DAGCombinerInfo &DCI) const;
+  SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
 protected:
   static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
@@ -109,6 +115,8 @@ protected:
                                SmallVectorImpl<ISD::InputArg> &OrigIns) const;
   void AnalyzeFormalArguments(CCState &State,
                               const SmallVectorImpl<ISD::InputArg> &Ins) const;
+  void AnalyzeReturn(CCState &State,
+                     const SmallVectorImpl<ISD::OutputArg> &Outs) const;
 
 public:
   AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
@@ -263,6 +271,7 @@ enum NodeType : unsigned {
   BFE_I32, // Extract range of bits with sign extension to 32-bits.
   BFI, // (src0 & src1) | (~src0 & src2)
   BFM, // Insert a range of bits into a 32-bit word.
+  FFBH_U32, // ctlz with -1 if input is zero.
   MUL_U24,
   MUL_I24,
   MAD_U24,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 70e589c..575dfe4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -191,6 +191,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
 
+def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
+
 // Signed and unsigned 24-bit mulitply.  The highest 8-bits are ignore when
 // performing the mulitply.  The result is a 32-bit value.
 def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
@@ -240,4 +242,4 @@ def IL_brcond      : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
 // Call/Return DAG Nodes
 //===----------------------------------------------------------------------===//
 def IL_retflag       : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
-    [SDNPHasChain, SDNPOptInGlue]>;
+    [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 22f85b3..b1be619 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -66,8 +66,12 @@ static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
 }
 
 static MachineSchedRegistry
-SchedCustomRegistry("r600", "Run R600's custom scheduler",
-                    createR600MachineScheduler);
+R600SchedRegistry("r600", "Run R600's custom scheduler",
+                   createR600MachineScheduler);
+
+static MachineSchedRegistry
+SISchedRegistry("si", "Run SI's custom scheduler",
+                createSIMachineScheduler);
 
 static std::string computeDataLayout(const Triple &TT) {
   std::string Ret = "e-p:32:32";
diff --git a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 779a14e..2245f14 100644
--- a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -349,7 +349,7 @@ def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
 def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
 def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
 
-def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
+def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>;
 def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
 
 let hasSideEffects = 1 in {
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 68b1d1a..4bc80a0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -28,7 +28,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
   //===--- Global Variable Emission Directives --------------------------===//
   HasAggressiveSymbolFolding = true;
   COMMDirectiveAlignmentIsInBytes = false;
-  HasDotTypeDotSizeDirective = false;
   HasNoDeadStrip = true;
   WeakRefDirective = ".weakref\t";
   //===--- Dwarf Emission Directives -----------------------------------===//
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
index 7f79dd3..aa1e352 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -137,7 +137,7 @@ namespace SIOutMods {
 #define   C_00B84C_EXCP_EN 
 
 #define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
-
+#define R_0286D0_SPI_PS_INPUT_ADDR                                      0x0286D0
 
 #define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
 #define   S_00B848_VGPRS(x)                                           (((x) & 0x3F) << 0)
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 96e37c5..f59d994 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -215,7 +215,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
 
   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
     unsigned SrcReg = MI.getOperand(I).getReg();
-    unsigned SrcSubReg = MI.getOperand(I).getReg();
+    unsigned SrcSubReg = MI.getOperand(I).getSubReg();
 
     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
     assert(TRI->isSGPRClass(SrcRC) &&
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 02a3930..6230d1e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -334,12 +334,20 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
           !MRI.hasOneUse(MI.getOperand(0).getReg()))
         continue;
 
-      // FIXME: Fold operands with subregs.
       if (OpToFold.isReg() &&
-          (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
-           OpToFold.getSubReg()))
+          !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()))
         continue;
 
+      // Prevent folding operands backwards in the function. For example,
+      // the COPY opcode must not be replaced by 1 in this example:
+      //
+      //    %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3
+      //    ...
+      //    %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use>
+      MachineOperand &Dst = MI.getOperand(0);
+      if (Dst.isReg() &&
+          !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+        continue;
 
       // We need mutate the operands of new mov instructions to add implicit
       // uses of EXEC, but adding them invalidates the use_iterator, so defer
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0e043cb..5448675 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -259,7 +259,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
   setTargetDAGCombine(ISD::SMAX);
   setTargetDAGCombine(ISD::UMIN);
   setTargetDAGCombine(ISD::UMAX);
-  setTargetDAGCombine(ISD::SELECT_CC);
   setTargetDAGCombine(ISD::SETCC);
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
@@ -598,18 +597,20 @@ SDValue SITargetLowering::LowerFormalArguments(
 
     // First check if it's a PS input addr
     if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
-        !Arg.Flags.isByVal()) {
+        !Arg.Flags.isByVal() && PSInputNum <= 15) {
 
-      assert((PSInputNum <= 15) && "Too many PS inputs!");
-
-      if (!Arg.Used) {
+      if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
         // We can safely skip PS inputs
         Skipped.set(i);
         ++PSInputNum;
         continue;
       }
 
-      Info->PSInputAddr |= 1 << PSInputNum++;
+      Info->markPSInputAllocated(PSInputNum);
+      if (Arg.Used)
+        Info->PSInputEna |= 1 << PSInputNum;
+
+      ++PSInputNum;
     }
 
     // Second split vertices into their elements
@@ -639,11 +640,25 @@ SDValue SITargetLowering::LowerFormalArguments(
                  *DAG.getContext());
 
   // At least one interpolation mode must be enabled or else the GPU will hang.
+  //
+  // Check PSInputAddr instead of PSInputEna. The idea is that if the user set
+  // PSInputAddr, the user wants to enable some bits after the compilation
+  // based on run-time states. Since we can't know what the final PSInputEna
+  // will look like, so we shouldn't do anything here and the user should take
+  // responsibility for the correct programming.
+  //
+  // Otherwise, the following restrictions apply:
+  // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+  // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+  //   enabled too.
   if (Info->getShaderType() == ShaderType::PIXEL &&
-      (Info->PSInputAddr & 0x7F) == 0) {
-    Info->PSInputAddr |= 1;
+      ((Info->getPSInputAddr() & 0x7F) == 0 ||
+       ((Info->getPSInputAddr() & 0xF) == 0 &&
+	Info->isPSInputAllocated(11)))) {
     CCInfo.AllocateReg(AMDGPU::VGPR0);
     CCInfo.AllocateReg(AMDGPU::VGPR1);
+    Info->markPSInputAllocated(0);
+    Info->PSInputEna |= 1;
   }
 
   if (Info->getShaderType() == ShaderType::COMPUTE) {
@@ -872,6 +887,97 @@ SDValue SITargetLowering::LowerFormalArguments(
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
 }
 
+SDValue SITargetLowering::LowerReturn(SDValue Chain,
+                                      CallingConv::ID CallConv,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                      const SmallVectorImpl<SDValue> &OutVals,
+                                      SDLoc DL, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+  if (Info->getShaderType() == ShaderType::COMPUTE)
+    return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
+                                             OutVals, DL, DAG);
+
+  Info->setIfReturnsVoid(Outs.size() == 0);
+
+  SmallVector<ISD::OutputArg, 48> Splits;
+  SmallVector<SDValue, 48> SplitVals;
+
+  // Split vectors into their elements.
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    const ISD::OutputArg &Out = Outs[i];
+
+    if (Out.VT.isVector()) {
+      MVT VT = Out.VT.getVectorElementType();
+      ISD::OutputArg NewOut = Out;
+      NewOut.Flags.setSplit();
+      NewOut.VT = VT;
+
+      // We want the original number of vector elements here, e.g.
+      // three or five, not four or eight.
+      unsigned NumElements = Out.ArgVT.getVectorNumElements();
+
+      for (unsigned j = 0; j != NumElements; ++j) {
+        SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i],
+                                   DAG.getConstant(j, DL, MVT::i32));
+        SplitVals.push_back(Elem);
+        Splits.push_back(NewOut);
+        NewOut.PartOffset += NewOut.VT.getStoreSize();
+      }
+    } else {
+      SplitVals.push_back(OutVals[i]);
+      Splits.push_back(Out);
+    }
+  }
+
+  // CCValAssign - represent the assignment of the return value to a location.
+  SmallVector<CCValAssign, 48> RVLocs;
+
+  // CCState - Info about the registers and stack slots.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
+
+  // Analyze outgoing return values.
+  AnalyzeReturn(CCInfo, Splits);
+
+  SDValue Flag;
+  SmallVector<SDValue, 48> RetOps;
+  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0, realRVLocIdx = 0;
+       i != RVLocs.size();
+       ++i, ++realRVLocIdx) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    SDValue Arg = SplitVals[realRVLocIdx];
+
+    // Copied from other backends.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+      break;
+    }
+
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  // Update chain and glue.
+  RetOps[0] = Chain;
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps);
+}
+
 MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     MachineInstr * MI, MachineBasicBlock * BB) const {
 
@@ -1158,6 +1264,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
   switch (IntrinsicID) {
   case Intrinsic::amdgcn_dispatch_ptr:
+    if (!Subtarget->isAmdHsaOS()) {
+      DiagnosticInfoUnsupported BadIntrin(*MF.getFunction(),
+                                          "hsa intrinsic without hsa target");
+      DAG.getContext()->diagnose(BadIntrin);
+      return DAG.getUNDEF(VT);
+    }
+
     return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
       TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
 
@@ -2027,7 +2140,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
 
   case ISD::UINT_TO_FP: {
     return performUCharToFloatCombine(N, DCI);
-
+  }
   case ISD::FADD: {
     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
       break;
@@ -2109,7 +2222,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
 
     break;
   }
-  }
   case ISD::LOAD:
   case ISD::STORE:
   case ISD::ATOMIC_LOAD:
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index e2f8cb1..f01b2c0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -95,6 +95,13 @@ public:
                                SDLoc DL, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) const override;
 
+  SDValue LowerReturn(SDValue Chain,
+                      CallingConv::ID CallConv,
+                      bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals,
+                      SDLoc DL, SelectionDAG &DAG) const override;
+
   MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
                                       MachineBasicBlock * BB) const override;
   bool enableAggressiveFMAFusion(EVT VT) const override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 821aada..94e6147 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -84,6 +84,9 @@ private:
 
   bool LastInstWritesM0;
 
+  /// \brief Whether the machine function returns void
+  bool ReturnsVoid;
+
   /// \brief Get increment/decrement amount for this instruction.
   Counters getHwCounts(MachineInstr &MI);
 
@@ -322,7 +325,9 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
                                const Counters &Required) {
 
   // End of program? No need to wait on anything
-  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+  // A function not returning void needs to wait, because other bytecode will
+  // be appended after it and we don't know what it will be.
+  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
     return false;
 
   // Figure out if the async instructions execute in order
@@ -465,6 +470,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
   LastIssued = ZeroCounts;
   LastOpcodeType = OTHER;
   LastInstWritesM0 = false;
+  ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
 
   memset(&UsedRegs, 0, sizeof(UsedRegs));
   memset(&DefinedRegs, 0, sizeof(DefinedRegs));
@@ -488,6 +494,14 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
 
     // Wait for everything at the end of the MBB
     Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+
+    // Functions returning something shouldn't contain S_ENDPGM, because other
+    // bytecode will be appended after it.
+    if (!ReturnsVoid) {
+      MachineBasicBlock::iterator I = MBB.getFirstTerminator();
+      if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+        I->eraseFromParent();
+    }
   }
 
   return Changes;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a08a5a8..1e10d25 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1777,6 +1777,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
     MRI.getRegClass(Reg) :
     RI.getPhysRegClass(Reg);
 
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+  RC = TRI->getSubRegClass(RC, MO.getSubReg());
+
   // In order to be legal, the common sub-class must be equal to the
   // class of the current operand.  For example:
   //
@@ -3075,3 +3079,15 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
 
   return Rsrc23;
 }
+
+bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const {
+  unsigned Opc = MI->getOpcode();
+
+  return isSMRD(Opc);
+}
+
+bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const {
+  unsigned Opc = MI->getOpcode();
+
+  return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 307ef67..cce1ae7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -462,6 +462,9 @@ public:
 
   uint64_t getDefaultRsrcDataFormat() const;
   uint64_t getScratchRsrcWords23() const;
+
+  bool isLowLatencyInstruction(const MachineInstr *MI) const;
+  bool isHighLatencyInstruction(const MachineInstr *MI) const;
 };
 
 namespace AMDGPU {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index b7df058..89692ab 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -144,7 +144,7 @@ defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
 defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
 
 defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
-  [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
+  [(set i32:$dst, (AMDGPUffbh_u32 i32:$src0))]
 >;
 
 defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index bf15516..49677fc 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -46,8 +46,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
     WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
     PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
-    LDSWaveSpillSize(0),
     PSInputAddr(0),
+    ReturnsVoid(true),
+    LDSWaveSpillSize(0),
+    PSInputEna(0),
     NumUserSGPRs(0),
     NumSystemSGPRs(0),
     HasSpilledSGPRs(false),
@@ -72,6 +74,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
   const Function *F = MF.getFunction();
 
+  PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
+
   const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
 
   if (getShaderType() == ShaderType::COMPUTE)
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9c528d6..846ee5d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -57,10 +57,14 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
   unsigned WorkGroupInfoSystemSGPR;
   unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
 
+  // Graphics info.
+  unsigned PSInputAddr;
+  bool ReturnsVoid;
+
 public:
   // FIXME: Make private
   unsigned LDSWaveSpillSize;
-  unsigned PSInputAddr;
+  unsigned PSInputEna;
   std::map<unsigned, unsigned> LaneVGPRs;
   unsigned ScratchOffsetReg;
   unsigned NumUserSGPRs;
@@ -273,6 +277,26 @@ public:
     HasSpilledVGPRs = Spill;
   }
 
+  unsigned getPSInputAddr() const {
+    return PSInputAddr;
+  }
+
+  bool isPSInputAllocated(unsigned Index) const {
+    return PSInputAddr & (1 << Index);
+  }
+
+  void markPSInputAllocated(unsigned Index) {
+    PSInputAddr |= 1 << Index;
+  }
+
+  bool returnsVoid() const {
+    return ReturnsVoid;
+  }
+
+  void setIfReturnsVoid(bool Value) {
+    ReturnsVoid = Value;
+  }
+
   unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
 };
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
new file mode 100644
index 0000000..1cfa984
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -0,0 +1,1968 @@
+//===-- SIMachineScheduler.cpp - SI Scheduler Interface -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIMachineScheduler.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+// This scheduler implements a different scheduling algorithm than
+// GenericScheduler.
+//
+// There are several specific architecture behaviours that can't be modelled
+// for GenericScheduler:
+// . When accessing the result of an SGPR load instruction, you have to wait
+// for all the SGPR load instructions before your current instruction to
+// have finished.
+// . When accessing the result of an VGPR load instruction, you have to wait
+// for all the VGPR load instructions previous to the VGPR load instruction
+// you are interested in to finish.
+// . The less the register pressure, the best load latencies are hidden
+//
+// Moreover some specifities (like the fact a lot of instructions in the shader
+// have few dependencies) makes the generic scheduler have some unpredictable
+// behaviours. For example when register pressure becomes high, it can either
+// manage to prevent register pressure from going too high, or it can
+// increase register pressure even more than if it hadn't taken register
+// pressure into account.
+//
+// Also some other bad behaviours are generated, like loading at the beginning
+// of the shader a constant in VGPR you won't need until the end of the shader.
+//
+// The scheduling problem for SI can distinguish three main parts:
+// . Hiding high latencies (texture sampling, etc)
+// . Hiding low latencies (SGPR constant loading, etc)
+// . Keeping register usage low for better latency hiding and general
+//   performance
+//
+// Some other things can also affect performance, but are hard to predict
+// (cache usage, the fact the HW can issue several instructions from different
+// wavefronts if different types, etc)
+//
+// This scheduler tries to solve the scheduling problem by dividing it into
+// simpler sub-problems. It divides the instructions into blocks, schedules
+// locally inside the blocks where it takes care of low latencies, and then
+// chooses the order of the blocks by taking care of high latencies.
+// Dividing the instructions into blocks helps control keeping register
+// usage low.
+//
+// First the instructions are put into blocks.
+//   We want the blocks help control register usage and hide high latencies
+//   later. To help control register usage, we typically want all local
+//   computations, when for example you create a result that can be comsummed
+//   right away, to be contained in a block. Block inputs and outputs would
+//   typically be important results that are needed in several locations of
+//   the shader. Since we do want blocks to help hide high latencies, we want
+//   the instructions inside the block to have a minimal set of dependencies
+//   on high latencies. It will make it easy to pick blocks to hide specific
+//   high latencies.
+//   The block creation algorithm is divided into several steps, and several
+//   variants can be tried during the scheduling process.
+//
+// Second the order of the instructions inside the blocks is choosen.
+//   At that step we do take into account only register usage and hiding
+//   low latency instructions
+//
+// Third the block order is choosen, there we try to hide high latencies
+// and keep register usage low.
+//
+// After the third step, a pass is done to improve the hiding of low
+// latencies.
+//
+// Actually when talking about 'low latency' or 'high latency' it includes
+// both the latency to get the cache (or global mem) data go to the register,
+// and the bandwith limitations.
+// Increasing the number of active wavefronts helps hide the former, but it
+// doesn't solve the latter, thus why even if wavefront count is high, we have
+// to try have as many instructions hiding high latencies as possible.
+// The OpenCL doc says for example latency of 400 cycles for a global mem access,
+// which is hidden by 10 instructions if the wavefront count is 10.
+
+// Some figures taken from AMD docs:
+// Both texture and constant L1 caches are 4-way associative with 64 bytes
+// lines.
+// Constant cache is shared with 4 CUs.
+// For texture sampling, the address generation unit receives 4 texture
+// addresses per cycle, thus we could expect texture sampling latency to be
+// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
+// instructions in a wavefront group are executed every 4 cycles),
+// or 16 instructions if the other wavefronts associated to the 3 other VALUs
+// of the CU do texture sampling too. (Don't take these figures too seriously,
+// as I'm not 100% sure of the computation)
+// Data exports should get similar latency.
+// For constant loading, the cache is shader with 4 CUs.
+// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
+// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
+// It means a simple s_buffer_load should take one instruction to hide, as
+// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
+// cache line.
+//
+// As of today the driver doesn't preload the constants in cache, thus the
+// first loads get extra latency. The doc says global memory access can be
+// 300-600 cycles. We do not specially take that into account when scheduling
+// As we expect the driver to be able to preload the constants soon.
+
+
+// common code //
+
+#ifndef NDEBUG
+
+static const char *getReasonStr(SIScheduleCandReason Reason) {
+  switch (Reason) {
+  case NoCand:         return "NOCAND";
+  case RegUsage:       return "REGUSAGE";
+  case Latency:        return "LATENCY";
+  case Successor:      return "SUCCESSOR";
+  case Depth:          return "DEPTH";
+  case NodeOrder:      return "ORDER";
+  }
+  llvm_unreachable("Unknown reason!");
+}
+
+#endif
+
+static bool tryLess(int TryVal, int CandVal,
+                    SISchedulerCandidate &TryCand,
+                    SISchedulerCandidate &Cand,
+                    SIScheduleCandReason Reason) {
+  if (TryVal < CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal > CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  Cand.setRepeat(Reason);
+  return false;
+}
+
+static bool tryGreater(int TryVal, int CandVal,
+                       SISchedulerCandidate &TryCand,
+                       SISchedulerCandidate &Cand,
+                       SIScheduleCandReason Reason) {
+  if (TryVal > CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal < CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  Cand.setRepeat(Reason);
+  return false;
+}
+
+// SIScheduleBlock //
+
+void SIScheduleBlock::addUnit(SUnit *SU) {
+  NodeNum2Index[SU->NodeNum] = SUnits.size();
+  SUnits.push_back(SU);
+}
+
+#ifndef NDEBUG
+
+void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
+
+  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+  dbgs() << '\n';
+}
+#endif
+
+void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
+                                          SISchedCandidate &TryCand) {
+  // Initialize the candidate if needed.
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
+    return;
+  }
+
+  if (Cand.SGPRUsage > 60 &&
+      tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, TryCand, Cand, RegUsage))
+    return;
+
+  // Schedule low latency instructions as top as possible.
+  // Order of priority is:
+  // . Low latency instructions which do not depend on other low latency
+  //   instructions we haven't waited for
+  // . Other instructions which do not depend on low latency instructions
+  //   we haven't waited for
+  // . Low latencies
+  // . All other instructions
+  // Goal is to get: low latency instructions - independant instructions
+  //     - (eventually some more low latency instructions)
+  //     - instructions that depend on the first low latency instructions.
+  // If in the block there is a lot of constant loads, the SGPR usage
+  // could go quite high, thus above the arbitrary limit of 60 will encourage
+  // use the already loaded constants (in order to release some SGPRs) before
+  // loading more.
+  if (tryLess(TryCand.HasLowLatencyNonWaitedParent,
+              Cand.HasLowLatencyNonWaitedParent,
+              TryCand, Cand, SIScheduleCandReason::Depth))
+    return;
+
+  if (tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
+                 TryCand, Cand, SIScheduleCandReason::Depth))
+    return;
+
+  if (TryCand.IsLowLatency &&
+      tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
+              TryCand, Cand, SIScheduleCandReason::Depth))
+    return;
+
+  if (tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand, RegUsage))
+    return;
+
+  // Fall through to original instruction order.
+  if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
+    TryCand.Reason = NodeOrder;
+  }
+}
+
+SUnit* SIScheduleBlock::pickNode() {
+  SISchedCandidate TopCand;
+
+  for (SUnit* SU : TopReadySUs) {
+    SISchedCandidate TryCand;
+    std::vector<unsigned> pressure;
+    std::vector<unsigned> MaxPressure;
+    // Predict register usage after this instruction.
+    TryCand.SU = SU;
+    TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
+    TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()];
+    TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()];
+    TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
+    TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
+    TryCand.HasLowLatencyNonWaitedParent =
+      HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
+    tryCandidateTopDown(TopCand, TryCand);
+    if (TryCand.Reason != NoCand)
+      TopCand.setBest(TryCand);
+  }
+
+  return TopCand.SU;
+}
+
+
+// Schedule something valid.
+void SIScheduleBlock::fastSchedule() {
+  TopReadySUs.clear();
+  if (Scheduled)
+    undoSchedule();
+
+  for (SUnit* SU : SUnits) {
+    if (!SU->NumPredsLeft)
+      TopReadySUs.push_back(SU);
+  }
+
+  while (!TopReadySUs.empty()) {
+    SUnit *SU = TopReadySUs[0];
+    ScheduledSUnits.push_back(SU);
+    nodeScheduled(SU);
+  }
+
+  Scheduled = true;
+}
+
+// Returns if the register was set between first and last.
+static bool isDefBetween(unsigned Reg,
+                           SlotIndex First, SlotIndex Last,
+                           const MachineRegisterInfo *MRI,
+                           const LiveIntervals *LIS) {
+  for (MachineRegisterInfo::def_instr_iterator
+       UI = MRI->def_instr_begin(Reg),
+       UE = MRI->def_instr_end(); UI != UE; ++UI) {
+    const MachineInstr* MI = &*UI;
+    if (MI->isDebugValue())
+      continue;
+    SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
+    if (InstSlot >= First && InstSlot <= Last)
+      return true;
+  }
+  return false;
+}
+
+void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
+                                      MachineBasicBlock::iterator EndBlock) {
+  IntervalPressure Pressure, BotPressure;
+  RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
+  LiveIntervals *LIS = DAG->getLIS();
+  MachineRegisterInfo *MRI = DAG->getMRI();
+  DAG->initRPTracker(TopRPTracker);
+  DAG->initRPTracker(BotRPTracker);
+  DAG->initRPTracker(RPTracker);
+
+  // Goes though all SU. RPTracker captures what had to be alive for the SUs
+  // to execute, and what is still alive at the end.
+  for (SUnit* SU : ScheduledSUnits) {
+    RPTracker.setPos(SU->getInstr());
+    RPTracker.advance();
+  }
+
+  // Close the RPTracker to finalize live ins/outs.
+  RPTracker.closeRegion();
+
+  // Initialize the live ins and live outs.
+  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+  // Do not Track Physical Registers, because it messes up.
+  for (unsigned Reg : RPTracker.getPressure().LiveInRegs) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      LiveInRegs.insert(Reg);
+  }
+  LiveOutRegs.clear();
+  // There is several possibilities to distinguish:
+  // 1) Reg is not input to any instruction in the block, but is output of one
+  // 2) 1) + read in the block and not needed after it
+  // 3) 1) + read in the block but needed in another block
+  // 4) Reg is input of an instruction but another block will read it too
+  // 5) Reg is input of an instruction and then rewritten in the block.
+  //    result is not read in the block (implies used in another block)
+  // 6) Reg is input of an instruction and then rewritten in the block.
+  //    result is read in the block and not needed in another block
+  // 7) Reg is input of an instruction and then rewritten in the block.
+  //    result is read in the block but also needed in another block
+  // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
+  // We want LiveOutRegs to contain only Regs whose content will be read after
+  // in another block, and whose content was written in the current block,
+  // that is we want it to get 1, 3, 5, 7
+  // Since we made the MIs of a block to be packed all together before
+  // scheduling, then the LiveIntervals were correct, and the RPTracker was
+  // able to correctly handle 5 vs 6, 2 vs 3.
+  // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
+  // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
+  // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
+  // The use of findDefBetween removes the case 4.
+  for (unsigned Reg : RPTracker.getPressure().LiveOutRegs) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+        isDefBetween(Reg, LIS->getInstructionIndex(BeginBlock).getRegSlot(),
+                       LIS->getInstructionIndex(EndBlock).getRegSlot(),
+                       MRI, LIS)) {
+      LiveOutRegs.insert(Reg);
+    }
+  }
+
+  // Pressure = sum_alive_registers register size
+  // Internally llvm will represent some registers as big 128 bits registers
+  // for example, but they actually correspond to 4 actual 32 bits registers.
+  // Thus Pressure is not equal to num_alive_registers * constant.
+  LiveInPressure = TopPressure.MaxSetPressure;
+  LiveOutPressure = BotPressure.MaxSetPressure;
+
+  // Prepares TopRPTracker for top down scheduling.
+  TopRPTracker.closeTop();
+}
+
+void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
+                               MachineBasicBlock::iterator EndBlock) {
+  if (!Scheduled)
+    fastSchedule();
+
+  // PreScheduling phase to set LiveIn and LiveOut.
+  initRegPressure(BeginBlock, EndBlock);
+  undoSchedule();
+
+  // Schedule for real now.
+
+  TopReadySUs.clear();
+
+  for (SUnit* SU : SUnits) {
+    if (!SU->NumPredsLeft)
+      TopReadySUs.push_back(SU);
+  }
+
+  while (!TopReadySUs.empty()) {
+    SUnit *SU = pickNode();
+    ScheduledSUnits.push_back(SU);
+    TopRPTracker.setPos(SU->getInstr());
+    TopRPTracker.advance();
+    nodeScheduled(SU);
+  }
+
+  // TODO: compute InternalAdditionnalPressure.
+  InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
+
+  // Check everything is right.
+#ifndef NDEBUG
+  assert(SUnits.size() == ScheduledSUnits.size() &&
+            TopReadySUs.empty());
+  for (SUnit* SU : SUnits) {
+    assert(SU->isScheduled &&
+              SU->NumPredsLeft == 0);
+  }
+#endif
+
+  Scheduled = true;
+}
+
+void SIScheduleBlock::undoSchedule() {
+  for (SUnit* SU : SUnits) {
+    SU->isScheduled = false;
+    for (SDep& Succ : SU->Succs) {
+      if (BC->isSUInBlock(Succ.getSUnit(), ID))
+        undoReleaseSucc(SU, &Succ);
+    }
+  }
+  HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
+  ScheduledSUnits.clear();
+  Scheduled = false;
+}
+
+void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+  if (SuccEdge->isWeak()) {
+    ++SuccSU->WeakPredsLeft;
+    return;
+  }
+  ++SuccSU->NumPredsLeft;
+}
+
+void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+  if (SuccEdge->isWeak()) {
+    --SuccSU->WeakPredsLeft;
+    return;
+  }
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(DAG);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(nullptr);
+  }
+#endif
+
+  --SuccSU->NumPredsLeft;
+}
+
+/// Release Successors of the SU that are in the block or not.
+void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
+  for (SDep& Succ : SU->Succs) {
+    SUnit *SuccSU = Succ.getSUnit();
+
+    if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
+      continue;
+
+    releaseSucc(SU, &Succ);
+    if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
+      TopReadySUs.push_back(SuccSU);
+  }
+}
+
+void SIScheduleBlock::nodeScheduled(SUnit *SU) {
+  // Is in TopReadySUs
+  assert (!SU->NumPredsLeft);
+  std::vector<SUnit*>::iterator I =
+    std::find(TopReadySUs.begin(), TopReadySUs.end(), SU);
+  if (I == TopReadySUs.end()) {
+    dbgs() << "Data Structure Bug in SI Scheduler\n";
+    llvm_unreachable(nullptr);
+  }
+  TopReadySUs.erase(I);
+
+  releaseSuccessors(SU, true);
+  // Scheduling this node will trigger a wait,
+  // thus propagate to other instructions that they do not need to wait either.
+  if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
+    HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
+
+  if (DAG->IsLowLatencySU[SU->NodeNum]) {
+     for (SDep& Succ : SU->Succs) {
+      std::map<unsigned, unsigned>::iterator I =
+        NodeNum2Index.find(Succ.getSUnit()->NodeNum);
+      if (I != NodeNum2Index.end())
+        HasLowLatencyNonWaitedParent[I->second] = 1;
+    }
+  }
+  SU->isScheduled = true;
+}
+
+void SIScheduleBlock::finalizeUnits() {
+  // We remove links from outside blocks to enable scheduling inside the block.
+  for (SUnit* SU : SUnits) {
+    releaseSuccessors(SU, false);
+    if (DAG->IsHighLatencySU[SU->NodeNum])
+      HighLatencyBlock = true;
+  }
+  HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
+}
+
+// we maintain ascending order of IDs
+void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
+  unsigned PredID = Pred->getID();
+
+  // Check if not already predecessor.
+  for (SIScheduleBlock* P : Preds) {
+    if (PredID == P->getID())
+      return;
+  }
+  Preds.push_back(Pred);
+
+#ifndef NDEBUG
+  for (SIScheduleBlock* S : Succs) {
+    if (PredID == S->getID())
+      assert(!"Loop in the Block Graph!\n");
+  }
+#endif
+}
+
+void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) {
+  unsigned SuccID = Succ->getID();
+
+  // Check if not already predecessor.
+  for (SIScheduleBlock* S : Succs) {
+    if (SuccID == S->getID())
+      return;
+  }
+  if (Succ->isHighLatencyBlock())
+    ++NumHighLatencySuccessors;
+  Succs.push_back(Succ);
+#ifndef NDEBUG
+  for (SIScheduleBlock* P : Preds) {
+    if (SuccID == P->getID())
+      assert("Loop in the Block Graph!\n");
+  }
+#endif
+}
+
+#ifndef NDEBUG
+void SIScheduleBlock::printDebug(bool full) {
+  dbgs() << "Block (" << ID << ")\n";
+  if (!full)
+    return;
+
+  dbgs() << "\nContains High Latency Instruction: "
+         << HighLatencyBlock << '\n';
+  dbgs() << "\nDepends On:\n";
+  for (SIScheduleBlock* P : Preds) {
+    P->printDebug(false);
+  }
+
+  dbgs() << "\nSuccessors:\n";
+  for (SIScheduleBlock* S : Succs) {
+    S->printDebug(false);
+  }
+
+  if (Scheduled) {
+    dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' '
+           << LiveInPressure[DAG->getVGPRSetID()] << '\n';
+    dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' '
+           << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n";
+    dbgs() << "LiveIns:\n";
+    for (unsigned Reg : LiveInRegs)
+      dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+
+    dbgs() << "\nLiveOuts:\n";
+    for (unsigned Reg : LiveOutRegs)
+      dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+  }
+
+  dbgs() << "\nInstructions:\n";
+  if (!Scheduled) {
+    for (SUnit* SU : SUnits) {
+      SU->dump(DAG);
+    }
+  } else {
+    for (SUnit* SU : SUnits) {
+      SU->dump(DAG);
+    }
+  }
+
+   dbgs() << "///////////////////////\n";
+}
+
+#endif
+
+// SIScheduleBlockCreator //
+
+SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) :
+DAG(DAG) {
+}
+
+SIScheduleBlockCreator::~SIScheduleBlockCreator() {
+}
+
+SIScheduleBlocks
+SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
+  std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
+    Blocks.find(BlockVariant);
+  if (B == Blocks.end()) {
+    SIScheduleBlocks Res;
+    createBlocksForVariant(BlockVariant);
+    topologicalSort();
+    scheduleInsideBlocks();
+    fillStats();
+    Res.Blocks = CurrentBlocks;
+    Res.TopDownIndex2Block = TopDownIndex2Block;
+    Res.TopDownBlock2Index = TopDownBlock2Index;
+    Blocks[BlockVariant] = Res;
+    return Res;
+  } else {
+    return B->second;
+  }
+}
+
+bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
+  if (SU->NodeNum >= DAG->SUnits.size())
+    return false;
+  return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
+}
+
+void SIScheduleBlockCreator::colorHighLatenciesAlone() {
+  unsigned DAGSize = DAG->SUnits.size();
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[i];
+    if (DAG->IsHighLatencySU[SU->NodeNum]) {
+      CurrentColoring[SU->NodeNum] = NextReservedID++;
+    }
+  }
+}
+
+void SIScheduleBlockCreator::colorHighLatenciesGroups() {
+  unsigned DAGSize = DAG->SUnits.size();
+  unsigned NumHighLatencies = 0;
+  unsigned GroupSize;
+  unsigned Color = NextReservedID;
+  unsigned Count = 0;
+  std::set<unsigned> FormingGroup;
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[i];
+    if (DAG->IsHighLatencySU[SU->NodeNum])
+      ++NumHighLatencies;
+  }
+
+  if (NumHighLatencies == 0)
+    return;
+
+  if (NumHighLatencies <= 6)
+    GroupSize = 2;
+  else if (NumHighLatencies <= 12)
+    GroupSize = 3;
+  else
+    GroupSize = 4;
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[i];
+    if (DAG->IsHighLatencySU[SU->NodeNum]) {
+      unsigned CompatibleGroup = true;
+      unsigned ProposedColor = Color;
+      for (unsigned j : FormingGroup) {
+        // TODO: Currently CompatibleGroup will always be false,
+        // because the graph enforces the load order. This
+        // can be fixed, but as keeping the load order is often
+        // good for performance that causes a performance hit (both
+        // the default scheduler and this scheduler).
+        // When this scheduler determines a good load order,
+        // this can be fixed.
+        if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) ||
+            !DAG->canAddEdge(&DAG->SUnits[j], SU))
+          CompatibleGroup = false;
+      }
+      if (!CompatibleGroup || ++Count == GroupSize) {
+        FormingGroup.clear();
+        Color = ++NextReservedID;
+        if (!CompatibleGroup) {
+          ProposedColor = Color;
+          FormingGroup.insert(SU->NodeNum);
+        }
+        Count = 0;
+      } else {
+        FormingGroup.insert(SU->NodeNum);
+      }
+      CurrentColoring[SU->NodeNum] = ProposedColor;
+    }
+  }
+}
+
+void SIScheduleBlockCreator::colorComputeReservedDependencies() {
+  unsigned DAGSize = DAG->SUnits.size();
+  std::map<std::set<unsigned>, unsigned> ColorCombinations;
+
+  CurrentTopDownReservedDependencyColoring.clear();
+  CurrentBottomUpReservedDependencyColoring.clear();
+
+  CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
+  CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
+
+  // Traverse TopDown, and give different colors to SUs depending
+  // on which combination of High Latencies they depend on.
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->TopDownIndex2SU[i]];
+    std::set<unsigned> SUColors;
+
+    // Already given.
+    if (CurrentColoring[SU->NodeNum]) {
+      CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+        CurrentColoring[SU->NodeNum];
+      continue;
+    }
+
+   for (SDep& PredDep : SU->Preds) {
+      SUnit *Pred = PredDep.getSUnit();
+      if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
+        continue;
+      if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
+        SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
+    }
+    // Color 0 by default.
+    if (SUColors.empty())
+      continue;
+    // Same color than parents.
+    if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
+      CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+        *SUColors.begin();
+    else {
+      std::map<std::set<unsigned>, unsigned>::iterator Pos =
+        ColorCombinations.find(SUColors);
+      if (Pos != ColorCombinations.end()) {
+          CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
+      } else {
+        CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+          NextNonReservedID;
+        ColorCombinations[SUColors] = NextNonReservedID++;
+      }
+    }
+  }
+
+  ColorCombinations.clear();
+
+  // Same as before, but BottomUp.
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+    std::set<unsigned> SUColors;
+
+    // Already given.
+    if (CurrentColoring[SU->NodeNum]) {
+      CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+        CurrentColoring[SU->NodeNum];
+      continue;
+    }
+
+    for (SDep& SuccDep : SU->Succs) {
+      SUnit *Succ = SuccDep.getSUnit();
+      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+        continue;
+      if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
+        SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
+    }
+    // Keep color 0.
+    if (SUColors.empty())
+      continue;
+    // Same color than parents.
+    if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
+      CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+        *SUColors.begin();
+    else {
+      std::map<std::set<unsigned>, unsigned>::iterator Pos =
+        ColorCombinations.find(SUColors);
+      if (Pos != ColorCombinations.end()) {
+        CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
+      } else {
+        CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+          NextNonReservedID;
+        ColorCombinations[SUColors] = NextNonReservedID++;
+      }
+    }
+  }
+}
+
+void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
+  unsigned DAGSize = DAG->SUnits.size();
+  std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
+
+  // Every combination of colors given by the top down
+  // and bottom up Reserved node dependency
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[i];
+    std::pair<unsigned, unsigned> SUColors;
+
+    // High latency instructions: already given.
+    if (CurrentColoring[SU->NodeNum])
+      continue;
+
+    SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
+    SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
+
+    std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
+      ColorCombinations.find(SUColors);
+    if (Pos != ColorCombinations.end()) {
+      CurrentColoring[SU->NodeNum] = Pos->second;
+    } else {
+      CurrentColoring[SU->NodeNum] = NextNonReservedID;
+      ColorCombinations[SUColors] = NextNonReservedID++;
+    }
+  }
+}
+
+void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
+  unsigned DAGSize = DAG->SUnits.size();
+  std::vector<int> PendingColoring = CurrentColoring;
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+    std::set<unsigned> SUColors;
+    std::set<unsigned> SUColorsPending;
+
+    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+      continue;
+
+    if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
+        CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
+      continue;
+
+    for (SDep& SuccDep : SU->Succs) {
+      SUnit *Succ = SuccDep.getSUnit();
+      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+        continue;
+      if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
+          CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
+        SUColors.insert(CurrentColoring[Succ->NodeNum]);
+      SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
+    }
+    if (SUColors.size() == 1 && SUColorsPending.size() == 1)
+      PendingColoring[SU->NodeNum] = *SUColors.begin();
+    else // TODO: Attribute new colors depending on color
+         // combination of children.
+      PendingColoring[SU->NodeNum] = NextNonReservedID++;
+  }
+  CurrentColoring = PendingColoring;
+}
+
+
+void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
+  unsigned DAGSize = DAG->SUnits.size();
+  unsigned PreviousColor;
+  std::set<unsigned> SeenColors;
+
+  if (DAGSize <= 1)
+    return;
+
+  PreviousColor = CurrentColoring[0];
+
+  for (unsigned i = 1, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[i];
+    unsigned CurrentColor = CurrentColoring[i];
+    unsigned PreviousColorSave = PreviousColor;
+    assert(i == SU->NodeNum);
+
+    if (CurrentColor != PreviousColor)
+      SeenColors.insert(PreviousColor);
+    PreviousColor = CurrentColor;
+
+    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+      continue;
+
+    if (SeenColors.find(CurrentColor) == SeenColors.end())
+      continue;
+
+    if (PreviousColorSave != CurrentColor)
+      CurrentColoring[i] = NextNonReservedID++;
+    else
+      CurrentColoring[i] = CurrentColoring[i-1];
+  }
+}
+
+void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
+  unsigned DAGSize = DAG->SUnits.size();
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+    std::set<unsigned> SUColors;
+
+    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+      continue;
+
+    // No predecessor: Vgpr constant loading.
+    // Low latency instructions usually have a predecessor (the address)
+    if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
+      continue;
+
+    for (SDep& SuccDep : SU->Succs) {
+      SUnit *Succ = SuccDep.getSUnit();
+      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+        continue;
+      SUColors.insert(CurrentColoring[Succ->NodeNum]);
+    }
+    if (SUColors.size() == 1)
+      CurrentColoring[SU->NodeNum] = *SUColors.begin();
+  }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
+  unsigned DAGSize = DAG->SUnits.size();
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+    std::set<unsigned> SUColors;
+
+    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+      continue;
+
+    for (SDep& SuccDep : SU->Succs) {
+       SUnit *Succ = SuccDep.getSUnit();
+      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+        continue;
+      SUColors.insert(CurrentColoring[Succ->NodeNum]);
+    }
+    if (SUColors.size() == 1)
+      CurrentColoring[SU->NodeNum] = *SUColors.begin();
+  }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
+  unsigned DAGSize = DAG->SUnits.size();
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+    std::set<unsigned> SUColors;
+
+    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+      continue;
+
+    for (SDep& SuccDep : SU->Succs) {
+       SUnit *Succ = SuccDep.getSUnit();
+      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+        continue;
+      SUColors.insert(CurrentColoring[Succ->NodeNum]);
+    }
+    if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
+      CurrentColoring[SU->NodeNum] = *SUColors.begin();
+  }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
+  unsigned DAGSize = DAG->SUnits.size();
+  std::map<unsigned, unsigned> ColorCount;
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+    unsigned color = CurrentColoring[SU->NodeNum];
+    std::map<unsigned, unsigned>::iterator Pos = ColorCount.find(color);
+      if (Pos != ColorCount.end()) {
+        ++ColorCount[color];
+      } else {
+        ColorCount[color] = 1;
+      }
+  }
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+    unsigned color = CurrentColoring[SU->NodeNum];
+    std::set<unsigned> SUColors;
+
+    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+      continue;
+
+    if (ColorCount[color] > 1)
+      continue;
+
+    for (SDep& SuccDep : SU->Succs) {
+       SUnit *Succ = SuccDep.getSUnit();
+      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+        continue;
+      SUColors.insert(CurrentColoring[Succ->NodeNum]);
+    }
+    if (SUColors.size() == 1 && *SUColors.begin() != color) {
+      --ColorCount[color];
+      CurrentColoring[SU->NodeNum] = *SUColors.begin();
+      ++ColorCount[*SUColors.begin()];
+    }
+  }
+}
+
+void SIScheduleBlockCreator::cutHugeBlocks() {
+  // TODO
+}
+
+void SIScheduleBlockCreator::regroupNoUserInstructions() {
+  unsigned DAGSize = DAG->SUnits.size();
+  int GroupID = NextNonReservedID++;
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+    bool hasSuccessor = false;
+
+    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+      continue;
+
+    for (SDep& SuccDep : SU->Succs) {
+       SUnit *Succ = SuccDep.getSUnit();
+      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+        continue;
+      hasSuccessor = true;
+    }
+    if (!hasSuccessor)
+      CurrentColoring[SU->NodeNum] = GroupID;
+  }
+}
+
+void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
+  unsigned DAGSize = DAG->SUnits.size();
+  std::map<unsigned,unsigned> RealID;
+
+  CurrentBlocks.clear();
+  CurrentColoring.clear();
+  CurrentColoring.resize(DAGSize, 0);
+  Node2CurrentBlock.clear();
+
+  // Restore links previous scheduling variant has overridden.
+  DAG->restoreSULinksLeft();
+
+  NextReservedID = 1;
+  NextNonReservedID = DAGSize + 1;
+
+  DEBUG(dbgs() << "Coloring the graph\n");
+
+  if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
+    colorHighLatenciesGroups();
+  else
+    colorHighLatenciesAlone();
+  colorComputeReservedDependencies();
+  colorAccordingToReservedDependencies();
+  colorEndsAccordingToDependencies();
+  if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
+    colorForceConsecutiveOrderInGroup();
+  regroupNoUserInstructions();
+  colorMergeConstantLoadsNextGroup();
+  colorMergeIfPossibleNextGroupOnlyForReserved();
+
+  // Put SUs of same color into same block
+  Node2CurrentBlock.resize(DAGSize, -1);
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[i];
+    unsigned Color = CurrentColoring[SU->NodeNum];
+    if (RealID.find(Color) == RealID.end()) {
+      int ID = CurrentBlocks.size();
+      BlockPtrs.push_back(
+        make_unique<SIScheduleBlock>(DAG, this, ID));
+      CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
+      RealID[Color] = ID;
+    }
+    CurrentBlocks[RealID[Color]]->addUnit(SU);
+    Node2CurrentBlock[SU->NodeNum] = RealID[Color];
+  }
+
+  // Build dependencies between blocks.
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &DAG->SUnits[i];
+    int SUID = Node2CurrentBlock[i];
+     for (SDep& SuccDep : SU->Succs) {
+       SUnit *Succ = SuccDep.getSUnit();
+      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+        continue;
+      if (Node2CurrentBlock[Succ->NodeNum] != SUID)
+        CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]);
+    }
+    for (SDep& PredDep : SU->Preds) {
+      SUnit *Pred = PredDep.getSUnit();
+      if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
+        continue;
+      if (Node2CurrentBlock[Pred->NodeNum] != SUID)
+        CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
+    }
+  }
+
+  // Free root and leafs of all blocks to enable scheduling inside them.
+  for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+    SIScheduleBlock *Block = CurrentBlocks[i];
+    Block->finalizeUnits();
+  }
+  DEBUG(
+    dbgs() << "Blocks created:\n\n";
+    for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+      SIScheduleBlock *Block = CurrentBlocks[i];
+      Block->printDebug(true);
+    }
+  );
+}
+
+// Two functions taken from Codegen/MachineScheduler.cpp
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::const_iterator
+nextIfDebug(MachineBasicBlock::const_iterator I,
+            MachineBasicBlock::const_iterator End) {
+  for(; I != End; ++I) {
+    if (!I->isDebugValue())
+      break;
+  }
+  return I;
+}
+
+/// Non-const version.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I,
+            MachineBasicBlock::const_iterator End) {
+  // Cast the return value to nonconst MachineInstr, then cast to an
+  // instr_iterator, which does not check for null, finally return a
+  // bundle_iterator.
+  return MachineBasicBlock::instr_iterator(
+    const_cast<MachineInstr*>(
+      &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
+}
+
+void SIScheduleBlockCreator::topologicalSort() {
+  unsigned DAGSize = CurrentBlocks.size();
+  std::vector<int> WorkList;
+
+  DEBUG(dbgs() << "Topological Sort\n");
+
+  WorkList.reserve(DAGSize);
+  TopDownIndex2Block.resize(DAGSize);
+  TopDownBlock2Index.resize(DAGSize);
+  BottomUpIndex2Block.resize(DAGSize);
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SIScheduleBlock *Block = CurrentBlocks[i];
+    unsigned Degree = Block->getSuccs().size();
+    TopDownBlock2Index[i] = Degree;
+    if (Degree == 0) {
+      WorkList.push_back(i);
+    }
+  }
+
+  int Id = DAGSize;
+  while (!WorkList.empty()) {
+    int i = WorkList.back();
+    SIScheduleBlock *Block = CurrentBlocks[i];
+    WorkList.pop_back();
+    TopDownBlock2Index[i] = --Id;
+    TopDownIndex2Block[Id] = i;
+    for (SIScheduleBlock* Pred : Block->getPreds()) {
+      if (!--TopDownBlock2Index[Pred->getID()])
+        WorkList.push_back(Pred->getID());
+    }
+  }
+
+#ifndef NDEBUG
+  // Check correctness of the ordering.
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SIScheduleBlock *Block = CurrentBlocks[i];
+    for (SIScheduleBlock* Pred : Block->getPreds()) {
+      assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
+      "Wrong Top Down topological sorting");
+    }
+  }
+#endif
+
+  BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
+                                         TopDownIndex2Block.rend());
+}
+
+void SIScheduleBlockCreator::scheduleInsideBlocks() {
+  unsigned DAGSize = CurrentBlocks.size();
+
+  DEBUG(dbgs() << "\nScheduling Blocks\n\n");
+
+  // We do schedule a valid scheduling such that a Block corresponds
+  // to a range of instructions.
+  DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SIScheduleBlock *Block = CurrentBlocks[i];
+    Block->fastSchedule();
+  }
+
+  // Note: the following code, and the part restoring previous position
+  // is by far the most expensive operation of the Scheduler.
+
+  // Do not update CurrentTop.
+  MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
+  std::vector<MachineBasicBlock::iterator> PosOld;
+  std::vector<MachineBasicBlock::iterator> PosNew;
+  PosOld.reserve(DAG->SUnits.size());
+  PosNew.reserve(DAG->SUnits.size());
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    int BlockIndice = TopDownIndex2Block[i];
+    SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+    std::vector<SUnit*> SUs = Block->getScheduledUnits();
+
+    for (SUnit* SU : SUs) {
+      MachineInstr *MI = SU->getInstr();
+      MachineBasicBlock::iterator Pos = MI;
+      PosOld.push_back(Pos);
+      if (&*CurrentTopFastSched == MI) {
+        PosNew.push_back(Pos);
+        CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
+                                          DAG->getCurrentBottom());
+      } else {
+        // Update the instruction stream.
+        DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
+
+        // Update LiveIntervals.
+        // Note: Moving all instructions and calling handleMove everytime
+        // is the most cpu intensive operation of the scheduler.
+        // It would gain a lot if there was a way to recompute the
+        // LiveIntervals for the entire scheduling region.
+        DAG->getLIS()->handleMove(MI, /*UpdateFlags=*/true);
+        PosNew.push_back(CurrentTopFastSched);
+      }
+    }
+  }
+
+  // Now we have Block of SUs == Block of MI.
+  // We do the final schedule for the instructions inside the block.
+  // The property that all the SUs of the Block are grouped together as MI
+  // is used for correct reg usage tracking.
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SIScheduleBlock *Block = CurrentBlocks[i];
+    std::vector<SUnit*> SUs = Block->getScheduledUnits();
+    Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
+  }
+
+  DEBUG(dbgs() << "Restoring MI Pos\n");
+  // Restore old ordering (which prevents a LIS->handleMove bug).
+  for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
+    MachineBasicBlock::iterator POld = PosOld[i-1];
+    MachineBasicBlock::iterator PNew = PosNew[i-1];
+    if (PNew != POld) {
+      // Update the instruction stream.
+      DAG->getBB()->splice(POld, DAG->getBB(), PNew);
+
+      // Update LiveIntervals.
+      DAG->getLIS()->handleMove(POld, /*UpdateFlags=*/true);
+    }
+  }
+
+  DEBUG(
+    for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+      SIScheduleBlock *Block = CurrentBlocks[i];
+      Block->printDebug(true);
+    }
+  );
+}
+
+void SIScheduleBlockCreator::fillStats() {
+  unsigned DAGSize = CurrentBlocks.size();
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    int BlockIndice = TopDownIndex2Block[i];
+    SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+    if (Block->getPreds().size() == 0)
+      Block->Depth = 0;
+    else {
+      unsigned Depth = 0;
+      for (SIScheduleBlock *Pred : Block->getPreds()) {
+        if (Depth < Pred->Depth + 1)
+          Depth = Pred->Depth + 1;
+      }
+      Block->Depth = Depth;
+    }
+  }
+
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    int BlockIndice = BottomUpIndex2Block[i];
+    SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+    if (Block->getSuccs().size() == 0)
+      Block->Height = 0;
+    else {
+      unsigned Height = 0;
+      for (SIScheduleBlock *Succ : Block->getSuccs()) {
+        if (Height < Succ->Height + 1)
+          Height = Succ->Height + 1;
+      }
+      Block->Height = Height;
+    }
+  }
+}
+
+// SIScheduleBlockScheduler //
+
+SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
+                                                   SISchedulerBlockSchedulerVariant Variant,
+                                                   SIScheduleBlocks  BlocksStruct) :
+  DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
+  LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
+  SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
+
+  // Fill the usage of every output
+  // Warning: while by construction we always have a link between two blocks
+  // when one needs a result from the other, the number of users of an output
+  // is not the sum of child blocks having as input the same virtual register.
+  // Here is an example. A produces x and y. B eats x and produces x'.
+  // C eats x' and y. The register coalescer may have attributed the same
+  // virtual register to x and x'.
+  // To count accurately, we do a topological sort. In case the register is
+  // found for several parents, we increment the usage of the one with the
+  // highest topological index.
+  LiveOutRegsNumUsages.resize(Blocks.size());
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    SIScheduleBlock *Block = Blocks[i];
+    for (unsigned Reg : Block->getInRegs()) {
+      bool Found = false;
+      int topoInd = -1;
+      for (SIScheduleBlock* Pred: Block->getPreds()) {
+        std::set<unsigned> PredOutRegs = Pred->getOutRegs();
+        std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
+
+        if (RegPos != PredOutRegs.end()) {
+          Found = true;
+          if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
+            topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
+          }
+        }
+      }
+
+      if (!Found)
+        continue;
+
+      int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
+      std::map<unsigned, unsigned>::iterator RegPos =
+        LiveOutRegsNumUsages[PredID].find(Reg);
+      if (RegPos != LiveOutRegsNumUsages[PredID].end()) {
+        ++LiveOutRegsNumUsages[PredID][Reg];
+      } else {
+        LiveOutRegsNumUsages[PredID][Reg] = 1;
+      }
+    }
+  }
+
+  LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
+  BlockNumPredsLeft.resize(Blocks.size());
+  BlockNumSuccsLeft.resize(Blocks.size());
+
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    SIScheduleBlock *Block = Blocks[i];
+    BlockNumPredsLeft[i] = Block->getPreds().size();
+    BlockNumSuccsLeft[i] = Block->getSuccs().size();
+  }
+
+#ifndef NDEBUG
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    SIScheduleBlock *Block = Blocks[i];
+    assert(Block->getID() == i);
+  }
+#endif
+
+  std::set<unsigned> InRegs = DAG->getInRegs();
+  addLiveRegs(InRegs);
+
+  // Fill LiveRegsConsumers for regs that were already
+  // defined before scheduling.
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    SIScheduleBlock *Block = Blocks[i];
+    for (unsigned Reg : Block->getInRegs()) {
+      bool Found = false;
+      for (SIScheduleBlock* Pred: Block->getPreds()) {
+        std::set<unsigned> PredOutRegs = Pred->getOutRegs();
+        std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
+
+        if (RegPos != PredOutRegs.end()) {
+          Found = true;
+          break;
+        }
+      }
+
+      if (!Found) {
+        if (LiveRegsConsumers.find(Reg) == LiveRegsConsumers.end())
+          LiveRegsConsumers[Reg] = 1;
+        else
+          ++LiveRegsConsumers[Reg];
+      }
+    }
+  }
+
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    SIScheduleBlock *Block = Blocks[i];
+    if (BlockNumPredsLeft[i] == 0) {
+      ReadyBlocks.push_back(Block);
+    }
+  }
+
+  while (SIScheduleBlock *Block = pickBlock()) {
+    BlocksScheduled.push_back(Block);
+    blockScheduled(Block);
+  }
+
+  DEBUG(
+    dbgs() << "Block Order:";
+    for (SIScheduleBlock* Block : BlocksScheduled) {
+      dbgs() << ' ' << Block->getID();
+    }
+  );
+}
+
+bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
+                                                   SIBlockSchedCandidate &TryCand) {
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
+    return true;
+  }
+
+  // Try to hide high latencies.
+  if (tryLess(TryCand.LastPosHighLatParentScheduled,
+              Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
+    return true;
+  // Schedule high latencies early so you can hide them better.
+  if (tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
+                 TryCand, Cand, Latency))
+    return true;
+  if (TryCand.IsHighLatency && tryGreater(TryCand.Height, Cand.Height,
+                                          TryCand, Cand, Depth))
+    return true;
+  if (tryGreater(TryCand.NumHighLatencySuccessors,
+                 Cand.NumHighLatencySuccessors,
+                 TryCand, Cand, Successor))
+    return true;
+  return false;
+}
+
+bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
+                                                    SIBlockSchedCandidate &TryCand) {
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
+    return true;
+  }
+
+  if (tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
+              TryCand, Cand, RegUsage))
+    return true;
+  if (tryGreater(TryCand.NumSuccessors > 0,
+                 Cand.NumSuccessors > 0,
+                 TryCand, Cand, Successor))
+    return true;
+  if (tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
+    return true;
+  if (tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
+              TryCand, Cand, RegUsage))
+    return true;
+  return false;
+}
+
+SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
+  SIBlockSchedCandidate Cand;
+  std::vector<SIScheduleBlock*>::iterator Best;
+  SIScheduleBlock *Block;
+  if (ReadyBlocks.empty())
+    return nullptr;
+
+  DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
+                        VregCurrentUsage, SregCurrentUsage);
+  if (VregCurrentUsage > maxVregUsage)
+    maxVregUsage = VregCurrentUsage;
+  if (VregCurrentUsage > maxSregUsage)
+    maxSregUsage = VregCurrentUsage;
+  DEBUG(
+    dbgs() << "Picking New Blocks\n";
+    dbgs() << "Available: ";
+    for (SIScheduleBlock* Block : ReadyBlocks)
+      dbgs() << Block->getID() << ' ';
+    dbgs() << "\nCurrent Live:\n";
+    for (unsigned Reg : LiveRegs)
+      dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+    dbgs() << '\n';
+    dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';
+    dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';
+  );
+
+  Cand.Block = nullptr;
+  for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
+       E = ReadyBlocks.end(); I != E; ++I) {
+    SIBlockSchedCandidate TryCand;
+    TryCand.Block = *I;
+    TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
+    TryCand.VGPRUsageDiff =
+      checkRegUsageImpact(TryCand.Block->getInRegs(),
+                          TryCand.Block->getOutRegs())[DAG->getVGPRSetID()];
+    TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
+    TryCand.NumHighLatencySuccessors =
+      TryCand.Block->getNumHighLatencySuccessors();
+    TryCand.LastPosHighLatParentScheduled =
+      (unsigned int) std::max<int> (0,
+         LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
+           LastPosWaitedHighLatency);
+    TryCand.Height = TryCand.Block->Height;
+    // Try not to increase VGPR usage too much, else we may spill.
+    if (VregCurrentUsage > 120 ||
+        Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
+      if (!tryCandidateRegUsage(Cand, TryCand) &&
+          Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
+        tryCandidateLatency(Cand, TryCand);
+    } else {
+      if (!tryCandidateLatency(Cand, TryCand))
+        tryCandidateRegUsage(Cand, TryCand);
+    }
+    if (TryCand.Reason != NoCand) {
+      Cand.setBest(TryCand);
+      Best = I;
+      DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '
+                   << getReasonStr(Cand.Reason) << '\n');
+    }
+  }
+
+  DEBUG(
+    dbgs() << "Picking: " << Cand.Block->getID() << '\n';
+    dbgs() << "Is a block with high latency instruction: "
+      << (Cand.IsHighLatency ? "yes\n" : "no\n");
+    dbgs() << "Position of last high latency dependency: "
+           << Cand.LastPosHighLatParentScheduled << '\n';
+    dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';
+    dbgs() << '\n';
+  );
+
+  Block = Cand.Block;
+  ReadyBlocks.erase(Best);
+  return Block;
+}
+
+// Tracking of currently alive registers to determine VGPR Usage.
+
+void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
+  for (unsigned Reg : Regs) {
+    // For now only track virtual registers.
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    // If not already in the live set, then add it.
+    (void) LiveRegs.insert(Reg);
+  }
+}
+
+void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
+                                       std::set<unsigned> &Regs) {
+  for (unsigned Reg : Regs) {
+    // For now only track virtual registers.
+    std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
+    assert (Pos != LiveRegs.end() && // Reg must be live.
+               LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
+               LiveRegsConsumers[Reg] >= 1);
+    --LiveRegsConsumers[Reg];
+    if (LiveRegsConsumers[Reg] == 0)
+      LiveRegs.erase(Pos);
+  }
+}
+
+void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
+  for (SIScheduleBlock* Block : Parent->getSuccs()) {
+    --BlockNumPredsLeft[Block->getID()];
+    if (BlockNumPredsLeft[Block->getID()] == 0) {
+      ReadyBlocks.push_back(Block);
+    }
+    // TODO: Improve check. When the dependency between the high latency
+    // instructions and the instructions of the other blocks are WAR or WAW
+    // there will be no wait triggered. We would like these cases to not
+    // update LastPosHighLatencyParentScheduled.
+    if (Parent->isHighLatencyBlock())
+      LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled;
+  }
+}
+
+void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
+  decreaseLiveRegs(Block, Block->getInRegs());
+  addLiveRegs(Block->getOutRegs());
+  releaseBlockSuccs(Block);
+  for (std::map<unsigned, unsigned>::iterator RegI =
+       LiveOutRegsNumUsages[Block->getID()].begin(),
+       E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
+    std::pair<unsigned, unsigned> RegP = *RegI;
+    if (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end())
+      LiveRegsConsumers[RegP.first] = RegP.second;
+    else {
+      assert(LiveRegsConsumers[RegP.first] == 0);
+      LiveRegsConsumers[RegP.first] += RegP.second;
+    }
+  }
+  if (LastPosHighLatencyParentScheduled[Block->getID()] >
+        (unsigned)LastPosWaitedHighLatency)
+    LastPosWaitedHighLatency =
+      LastPosHighLatencyParentScheduled[Block->getID()];
+  ++NumBlockScheduled;
+}
+
+std::vector<int>
+SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
+                                     std::set<unsigned> &OutRegs) {
+  std::vector<int> DiffSetPressure;
+  DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
+
+  for (unsigned Reg : InRegs) {
+    // For now only track virtual registers.
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (LiveRegsConsumers[Reg] > 1)
+      continue;
+    PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
+    for (; PSetI.isValid(); ++PSetI) {
+      DiffSetPressure[*PSetI] -= PSetI.getWeight();
+    }
+  }
+
+  for (unsigned Reg : OutRegs) {
+    // For now only track virtual registers.
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
+    for (; PSetI.isValid(); ++PSetI) {
+      DiffSetPressure[*PSetI] += PSetI.getWeight();
+    }
+  }
+
+  return DiffSetPressure;
+}
+
+// SIScheduler //
+
+struct SIScheduleBlockResult
+SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
+                             SISchedulerBlockSchedulerVariant ScheduleVariant) {
+  SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
+  SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
+  std::vector<SIScheduleBlock*> ScheduledBlocks;
+  struct SIScheduleBlockResult Res;
+
+  ScheduledBlocks = Scheduler.getBlocks();
+
+  for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
+    SIScheduleBlock *Block = ScheduledBlocks[b];
+    std::vector<SUnit*> SUs = Block->getScheduledUnits();
+
+    for (SUnit* SU : SUs)
+      Res.SUs.push_back(SU->NodeNum);
+  }
+
+  Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
+  Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
+  return Res;
+}
+
+// SIScheduleDAGMI //
+
+SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
+  ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)) {
+  SITII = static_cast<const SIInstrInfo*>(TII);
+  SITRI = static_cast<const SIRegisterInfo*>(TRI);
+
+  VGPRSetID = SITRI->getVGPR32PressureSet();
+  SGPRSetID = SITRI->getSGPR32PressureSet();
+}
+
+SIScheduleDAGMI::~SIScheduleDAGMI() {
+}
+
+ScheduleDAGInstrs *llvm::createSIMachineScheduler(MachineSchedContext *C) {
+  return new SIScheduleDAGMI(C);
+}
+
+// Code adapted from scheduleDAG.cpp
+// Does a topological sort over the SUs.
+// Both TopDown and BottomUp
+void SIScheduleDAGMI::topologicalSort() {
+  std::vector<int> TopDownSU2Index;
+  unsigned DAGSize = SUnits.size();
+  std::vector<SUnit*> WorkList;
+
+  DEBUG(dbgs() << "Topological Sort\n");
+  WorkList.reserve(DAGSize);
+
+  TopDownIndex2SU.resize(DAGSize);
+  TopDownSU2Index.resize(DAGSize);
+  BottomUpIndex2SU.resize(DAGSize);
+
+  WorkList.push_back(&getExitSU());
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    int NodeNum = SU->NodeNum;
+    unsigned Degree = SU->Succs.size();
+    TopDownSU2Index[NodeNum] = Degree;
+    if (Degree == 0) {
+      assert(SU->Succs.empty() && "SUnit should have no successors");
+      WorkList.push_back(SU);
+    }
+  }
+
+  int Id = DAGSize;
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    if (SU->NodeNum < DAGSize) {
+      TopDownSU2Index[SU->NodeNum] = --Id;
+      TopDownIndex2SU[Id] = SU->NodeNum;
+    }
+    for (SDep& Pred : SU->Preds) {
+      SUnit *SU = Pred.getSUnit();
+      if (SU->NodeNum < DAGSize && !--TopDownSU2Index[SU->NodeNum])
+        WorkList.push_back(SU);
+    }
+  }
+
+  BottomUpIndex2SU = std::vector<int>(TopDownIndex2SU.rbegin(),
+                                      TopDownIndex2SU.rend());
+
+#ifndef NDEBUG
+  // Check correctness of the ordering
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    for (SDep& Pred : SU->Preds) {
+      if (Pred.getSUnit()->NodeNum >= DAGSize)
+        continue;
+      assert(TopDownSU2Index[SU->NodeNum] >
+             TopDownSU2Index[Pred.getSUnit()->NodeNum] &&
+             "Wrong Top Down topological sorting");
+    }
+  }
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    for (SDep& Succ : SU->Succs) {
+      if (Succ.getSUnit()->NodeNum >= DAGSize)
+        continue;
+      assert(TopDownSU2Index[SU->NodeNum] <
+             TopDownSU2Index[Succ.getSUnit()->NodeNum] &&
+             "Wrong Bottom Up topological sorting");
+    }
+  }
+#endif
+}
+
+// Move low latencies further from their user without
+// increasing SGPR usage (in general)
+// This is to be replaced by a better pass that would
+// take into account SGPR usage (based on VGPR Usage
+// and the corresponding wavefront count), that would
+// try to merge groups of loads if it make sense, etc
+void SIScheduleDAGMI::moveLowLatencies() {
+   unsigned DAGSize = SUnits.size();
+   int LastLowLatencyUser = -1;
+   int LastLowLatencyPos = -1;
+
+   for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[ScheduledSUnits[i]];
+    bool IsLowLatencyUser = false;
+    unsigned MinPos = 0;
+
+    for (SDep& PredDep : SU->Preds) {
+      SUnit *Pred = PredDep.getSUnit();
+      if (SITII->isLowLatencyInstruction(Pred->getInstr())) {
+        IsLowLatencyUser = true;
+      }
+      if (Pred->NodeNum >= DAGSize)
+        continue;
+      unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
+      if (PredPos >= MinPos)
+        MinPos = PredPos + 1;
+    }
+
+    if (SITII->isLowLatencyInstruction(SU->getInstr())) {
+      unsigned BestPos = LastLowLatencyUser + 1;
+      if ((int)BestPos <= LastLowLatencyPos)
+        BestPos = LastLowLatencyPos + 1;
+      if (BestPos < MinPos)
+        BestPos = MinPos;
+      if (BestPos < i) {
+        for (unsigned u = i; u > BestPos; --u) {
+          ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
+          ScheduledSUnits[u] = ScheduledSUnits[u-1];
+        }
+        ScheduledSUnits[BestPos] = SU->NodeNum;
+        ScheduledSUnitsInv[SU->NodeNum] = BestPos;
+      }
+      LastLowLatencyPos = BestPos;
+      if (IsLowLatencyUser)
+        LastLowLatencyUser = BestPos;
+    } else if (IsLowLatencyUser) {
+      LastLowLatencyUser = i;
+    // Moves COPY instructions on which depends
+    // the low latency instructions too.
+    } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
+      bool CopyForLowLat = false;
+      for (SDep& SuccDep : SU->Succs) {
+        SUnit *Succ = SuccDep.getSUnit();
+        if (SITII->isLowLatencyInstruction(Succ->getInstr())) {
+          CopyForLowLat = true;
+        }
+      }
+      if (!CopyForLowLat)
+        continue;
+      if (MinPos < i) {
+        for (unsigned u = i; u > MinPos; --u) {
+          ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
+          ScheduledSUnits[u] = ScheduledSUnits[u-1];
+        }
+        ScheduledSUnits[MinPos] = SU->NodeNum;
+        ScheduledSUnitsInv[SU->NodeNum] = MinPos;
+      }
+    }
+  }
+}
+
+void SIScheduleDAGMI::restoreSULinksLeft() {
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    SUnits[i].isScheduled = false;
+    SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
+    SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
+    SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
+    SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
+  }
+}
+
+// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
+template<typename _Iterator> void
+SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
+                                  unsigned &VgprUsage, unsigned &SgprUsage) {
+  VgprUsage = 0;
+  SgprUsage = 0;
+  for (_Iterator RegI = First; RegI != End; ++RegI) {
+    unsigned Reg = *RegI;
+    // For now only track virtual registers
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    PSetIterator PSetI = MRI.getPressureSets(Reg);
+    for (; PSetI.isValid(); ++PSetI) {
+      if (*PSetI == VGPRSetID)
+        VgprUsage += PSetI.getWeight();
+      else if (*PSetI == SGPRSetID)
+        SgprUsage += PSetI.getWeight();
+    }
+  }
+}
+
+void SIScheduleDAGMI::schedule()
+{
+  SmallVector<SUnit*, 8> TopRoots, BotRoots;
+  SIScheduleBlockResult Best, Temp;
+  DEBUG(dbgs() << "Preparing Scheduling\n");
+
+  buildDAGWithRegPressure();
+  DEBUG(
+    for(SUnit& SU : SUnits)
+       SU.dumpAll(this)
+  );
+
+  Topo.InitDAGTopologicalSorting();
+  topologicalSort();
+  findRootsAndBiasEdges(TopRoots, BotRoots);
+  // We reuse several ScheduleDAGMI and ScheduleDAGMILive
+  // functions, but to make them happy we must initialize
+  // the default Scheduler implementation (even if we do not
+  // run it)
+  SchedImpl->initialize(this);
+  initQueues(TopRoots, BotRoots);
+
+  // Fill some stats to help scheduling.
+
+  SUnitsLinksBackup = SUnits;
+  IsLowLatencySU.clear();
+  LowLatencyOffset.clear();
+  IsHighLatencySU.clear();
+
+  IsLowLatencySU.resize(SUnits.size(), 0);
+  LowLatencyOffset.resize(SUnits.size(), 0);
+  IsHighLatencySU.resize(SUnits.size(), 0);
+
+  for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    unsigned BaseLatReg, OffLatReg;
+    if (SITII->isLowLatencyInstruction(SU->getInstr())) {
+      IsLowLatencySU[i] = 1;
+      if (SITII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseLatReg,
+                                      OffLatReg, TRI))
+        LowLatencyOffset[i] = OffLatReg;
+    } else if (SITII->isHighLatencyInstruction(SU->getInstr()))
+      IsHighLatencySU[i] = 1;
+  }
+
+  SIScheduler Scheduler(this);
+  Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
+                                   SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
+#if 0 // To enable when handleMove fix lands
+  // if VGPR usage is extremely high, try other good performing variants
+  // which could lead to lower VGPR usage
+  if (Best.MaxVGPRUsage > 180) {
+    std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+      { LatenciesAlone, BlockRegUsageLatency },
+//      { LatenciesAlone, BlockRegUsage },
+      { LatenciesGrouped, BlockLatencyRegUsage },
+//      { LatenciesGrouped, BlockRegUsageLatency },
+//      { LatenciesGrouped, BlockRegUsage },
+      { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
+//      { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
+//      { LatenciesAlonePlusConsecutive, BlockRegUsage }
+    };
+    for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
+      Temp = Scheduler.scheduleVariant(v.first, v.second);
+      if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
+        Best = Temp;
+    }
+  }
+  // if VGPR usage is still extremely high, we may spill. Try other variants
+  // which are less performing, but that could lead to lower VGPR usage.
+  if (Best.MaxVGPRUsage > 200) {
+    std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+//      { LatenciesAlone, BlockRegUsageLatency },
+      { LatenciesAlone, BlockRegUsage },
+//      { LatenciesGrouped, BlockLatencyRegUsage },
+      { LatenciesGrouped, BlockRegUsageLatency },
+      { LatenciesGrouped, BlockRegUsage },
+//      { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
+      { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
+      { LatenciesAlonePlusConsecutive, BlockRegUsage }
+    };
+    for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
+      Temp = Scheduler.scheduleVariant(v.first, v.second);
+      if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
+        Best = Temp;
+    }
+  }
+#endif
+  ScheduledSUnits = Best.SUs;
+  ScheduledSUnitsInv.resize(SUnits.size());
+
+  for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
+    ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
+  }
+
+  moveLowLatencies();
+
+  // Tell the outside world about the result of the scheduling.
+
+  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+  TopRPTracker.setPos(CurrentTop);
+
+  for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
+       E = ScheduledSUnits.end(); I != E; ++I) {
+    SUnit *SU = &SUnits[*I];
+
+    scheduleMI(SU, true);
+
+    DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+                 << *SU->getInstr());
+  }
+
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+  placeDebugValues();
+
+  DEBUG({
+      unsigned BBNum = begin()->getParent()->getNumber();
+      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
new file mode 100644
index 0000000..b270136
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -0,0 +1,489 @@
+//===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
+
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+enum SIScheduleCandReason {
+  NoCand,
+  RegUsage,
+  Latency,
+  Successor,
+  Depth,
+  NodeOrder
+};
+
+struct SISchedulerCandidate {
+  // The reason for this candidate.
+  SIScheduleCandReason Reason;
+
+  // Set of reasons that apply to multiple candidates.
+  uint32_t RepeatReasonSet;
+
+  SISchedulerCandidate()
+    :  Reason(NoCand), RepeatReasonSet(0) {}
+
+  bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
+  void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
+};
+
+class SIScheduleDAGMI;
+class SIScheduleBlockCreator;
+
+class SIScheduleBlock {
+  SIScheduleDAGMI *DAG;
+  SIScheduleBlockCreator *BC;
+
+  std::vector<SUnit*> SUnits;
+  std::map<unsigned, unsigned> NodeNum2Index;
+  std::vector<SUnit*> TopReadySUs;
+  std::vector<SUnit*> ScheduledSUnits;
+
+  /// The top of the unscheduled zone.
+  IntervalPressure TopPressure;
+  RegPressureTracker TopRPTracker;
+
+  // Pressure: number of said class of registers needed to
+  // store the live virtual and real registers.
+  // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
+  // Pressure of additional registers required inside the block.
+  std::vector<unsigned> InternalAdditionnalPressure;
+  // Pressure of input and output registers
+  std::vector<unsigned> LiveInPressure;
+  std::vector<unsigned> LiveOutPressure;
+  // Registers required by the block, and outputs.
+  // We do track only virtual registers.
+  // Note that some registers are not 32 bits,
+  // and thus the pressure is not equal
+  // to the number of live registers.
+  std::set<unsigned> LiveInRegs;
+  std::set<unsigned> LiveOutRegs;
+
+  bool Scheduled;
+  bool HighLatencyBlock;
+
+  std::vector<unsigned> HasLowLatencyNonWaitedParent;
+
+  // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
+  unsigned ID;
+
+  std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
+  std::vector<SIScheduleBlock*> Succs;  // All blocks successors.
+  unsigned NumHighLatencySuccessors;
+
+public:
+  SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
+                  unsigned ID):
+    DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(),
+    TopRPTracker(TopPressure), Scheduled(false),
+    HighLatencyBlock(false), ID(ID),
+    Preds(), Succs(), NumHighLatencySuccessors(0) {};
+
+  ~SIScheduleBlock() {};
+
+  unsigned getID() const { return ID; }
+
+  /// Functions for Block construction.
+  void addUnit(SUnit *SU);
+
+  // When all SUs have been added.
+  void finalizeUnits();
+
+  // Add block pred, which has instruction predecessor of SU.
+  void addPred(SIScheduleBlock *Pred);
+  void addSucc(SIScheduleBlock *Succ);
+
+  const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
+  const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
+
+  unsigned Height;  // Maximum topdown path length to block without outputs
+  unsigned Depth;   // Maximum bottomup path length to block without inputs
+
+  unsigned getNumHighLatencySuccessors() const {
+    return NumHighLatencySuccessors;
+  }
+
+  bool isHighLatencyBlock() { return HighLatencyBlock; }
+
+  // This is approximative.
+  // Ideally should take into accounts some instructions (rcp, etc)
+  // are 4 times slower.
+  int getCost() { return SUnits.size(); }
+
+  // The block Predecessors and Successors must be all registered
+  // before fastSchedule().
+  // Fast schedule with no particular requirement.
+  void fastSchedule();
+
+  std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
+
+  // Complete schedule that will try to minimize reg pressure and
+  // low latencies, and will fill liveins and liveouts.
+  // Needs all MIs to be grouped between BeginBlock and EndBlock.
+  // The MIs can be moved after the scheduling,
+  // it is just used to allow correct track of live registers.
+  void schedule(MachineBasicBlock::iterator BeginBlock,
+                MachineBasicBlock::iterator EndBlock);
+
+  bool isScheduled() { return Scheduled; }
+
+
+  // Needs the block to be scheduled inside
+  // TODO: find a way to compute it.
+  std::vector<unsigned> &getInternalAdditionnalRegUsage() {
+    return InternalAdditionnalPressure;
+  }
+
+  std::set<unsigned> &getInRegs() { return LiveInRegs; }
+  std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
+
+  void printDebug(bool Full);
+
+private:
+  struct SISchedCandidate : SISchedulerCandidate {
+    // The best SUnit candidate.
+    SUnit *SU;
+
+    unsigned SGPRUsage;
+    unsigned VGPRUsage;
+    bool IsLowLatency;
+    unsigned LowLatencyOffset;
+    bool HasLowLatencyNonWaitedParent;
+
+    SISchedCandidate()
+      : SU(nullptr) {}
+
+    bool isValid() const { return SU; }
+
+    // Copy the status of another candidate without changing policy.
+    void setBest(SISchedCandidate &Best) {
+      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+      SU = Best.SU;
+      Reason = Best.Reason;
+      SGPRUsage = Best.SGPRUsage;
+      VGPRUsage = Best.VGPRUsage;
+      IsLowLatency = Best.IsLowLatency;
+      LowLatencyOffset = Best.LowLatencyOffset;
+      HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
+    }
+  };
+
+  void undoSchedule();
+
+  void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
+  void releaseSucc(SUnit *SU, SDep *SuccEdge);
+  // InOrOutBlock: restrict to links pointing inside the block (true),
+  // or restrict to links pointing outside the block (false).
+  void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
+
+  void nodeScheduled(SUnit *SU);
+  void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
+  void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
+  SUnit* pickNode();
+  void traceCandidate(const SISchedCandidate &Cand);
+  void initRegPressure(MachineBasicBlock::iterator BeginBlock,
+                       MachineBasicBlock::iterator EndBlock);
+};
+
+struct SIScheduleBlocks {
+  std::vector<SIScheduleBlock*> Blocks;
+  std::vector<int> TopDownIndex2Block;
+  std::vector<int> TopDownBlock2Index;
+};
+
+enum SISchedulerBlockCreatorVariant {
+    LatenciesAlone,
+    LatenciesGrouped,
+    LatenciesAlonePlusConsecutive
+};
+
+class SIScheduleBlockCreator {
+  SIScheduleDAGMI *DAG;
+  // unique_ptr handles freeing memory for us.
+  std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
+  std::map<SISchedulerBlockCreatorVariant,
+           SIScheduleBlocks> Blocks;
+  std::vector<SIScheduleBlock*> CurrentBlocks;
+  std::vector<int> Node2CurrentBlock;
+
+  // Topological sort
+  // Maps topological index to the node number.
+  std::vector<int> TopDownIndex2Block;
+  std::vector<int> TopDownBlock2Index;
+  std::vector<int> BottomUpIndex2Block;
+
+  // 0 -> Color not given.
+  // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
+  // Above -> Other groups.
+  int NextReservedID;
+  int NextNonReservedID;
+  std::vector<int> CurrentColoring;
+  std::vector<int> CurrentTopDownReservedDependencyColoring;
+  std::vector<int> CurrentBottomUpReservedDependencyColoring;
+
+public:
+  SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
+  ~SIScheduleBlockCreator();
+
+  SIScheduleBlocks
+  getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
+
+  bool isSUInBlock(SUnit *SU, unsigned ID);
+
+private:
+  // Give a Reserved color to every high latency.
+  void colorHighLatenciesAlone();
+
+  // Create groups of high latencies with a Reserved color.
+  void colorHighLatenciesGroups();
+
+  // Compute coloring for topdown and bottom traversals with
+  // different colors depending on dependencies on Reserved colors.
+  void colorComputeReservedDependencies();
+
+  // Give color to all non-colored SUs according to Reserved groups dependencies.
+  void colorAccordingToReservedDependencies();
+
+  // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
+  // The new colors are computed according to the dependencies on the other blocks
+  // formed with colorAccordingToReservedDependencies.
+  void colorEndsAccordingToDependencies();
+
+  // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
+  void colorForceConsecutiveOrderInGroup();
+
+  // Merge Constant loads that have all their users into another group to the group.
+  // (TODO: else if all their users depend on the same group, put them there)
+  void colorMergeConstantLoadsNextGroup();
+
+  // Merge SUs that have all their users into another group to the group
+  void colorMergeIfPossibleNextGroup();
+
+  // Merge SUs that have all their users into another group to the group,
+  // but only for Reserved groups.
+  void colorMergeIfPossibleNextGroupOnlyForReserved();
+
+  // Merge SUs that have all their users into another group to the group,
+  // but only if the group is no more than a few SUs.
+  void colorMergeIfPossibleSmallGroupsToNextGroup();
+
+  // Divides Blocks with important size.
+  // Idea of implementation: attribute new colors depending on topdown and
+  // bottom up links to other blocks.
+  void cutHugeBlocks();
+
+  // Put in one group all instructions with no users in this scheduling region
+  // (we'd want these groups be at the end).
+  void regroupNoUserInstructions();
+
+  void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
+
+  void topologicalSort();
+
+  void scheduleInsideBlocks();
+
+  void fillStats();
+};
+
+enum SISchedulerBlockSchedulerVariant {
+  BlockLatencyRegUsage,
+  BlockRegUsageLatency,
+  BlockRegUsage
+};
+
+class SIScheduleBlockScheduler {
+  SIScheduleDAGMI *DAG;
+  SISchedulerBlockSchedulerVariant Variant;
+  std::vector<SIScheduleBlock*> Blocks;
+
+  std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
+  std::set<unsigned> LiveRegs;
+  // Num of schedulable unscheduled blocks reading the register.
+  std::map<unsigned, unsigned> LiveRegsConsumers;
+
+  std::vector<unsigned> LastPosHighLatencyParentScheduled;
+  int LastPosWaitedHighLatency;
+
+  std::vector<SIScheduleBlock*> BlocksScheduled;
+  unsigned NumBlockScheduled;
+  std::vector<SIScheduleBlock*> ReadyBlocks;
+
+  unsigned VregCurrentUsage;
+  unsigned SregCurrentUsage;
+
+  // Currently is only approximation.
+  unsigned maxVregUsage;
+  unsigned maxSregUsage;
+
+  std::vector<unsigned> BlockNumPredsLeft;
+  std::vector<unsigned> BlockNumSuccsLeft;
+
+public:
+  SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
+                           SISchedulerBlockSchedulerVariant Variant,
+                           SIScheduleBlocks BlocksStruct);
+  ~SIScheduleBlockScheduler() {};
+
+  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; };
+
+  unsigned getVGPRUsage() { return maxVregUsage; };
+  unsigned getSGPRUsage() { return maxSregUsage; };
+
+private:
+  struct SIBlockSchedCandidate : SISchedulerCandidate {
+    // The best Block candidate.
+    SIScheduleBlock *Block;
+
+    bool IsHighLatency;
+    int VGPRUsageDiff;
+    unsigned NumSuccessors;
+    unsigned NumHighLatencySuccessors;
+    unsigned LastPosHighLatParentScheduled;
+    unsigned Height;
+
+    SIBlockSchedCandidate()
+      : Block(nullptr) {}
+
+    bool isValid() const { return Block; }
+
+    // Copy the status of another candidate without changing policy.
+    void setBest(SIBlockSchedCandidate &Best) {
+      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+      Block = Best.Block;
+      Reason = Best.Reason;
+      IsHighLatency = Best.IsHighLatency;
+      VGPRUsageDiff = Best.VGPRUsageDiff;
+      NumSuccessors = Best.NumSuccessors;
+      NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
+      LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
+      Height = Best.Height;
+    }
+  };
+
+  bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
+                           SIBlockSchedCandidate &TryCand);
+  bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
+                            SIBlockSchedCandidate &TryCand);
+  SIScheduleBlock *pickBlock();
+
+  void addLiveRegs(std::set<unsigned> &Regs);
+  void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
+  void releaseBlockSuccs(SIScheduleBlock *Parent);
+  void blockScheduled(SIScheduleBlock *Block);
+
+  // Check register pressure change
+  // by scheduling a block with these LiveIn and LiveOut.
+  std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
+                                       std::set<unsigned> &OutRegs);
+
+  void schedule();
+};
+
+struct SIScheduleBlockResult {
+  std::vector<unsigned> SUs;
+  unsigned MaxSGPRUsage;
+  unsigned MaxVGPRUsage;
+};
+
+class SIScheduler {
+  SIScheduleDAGMI *DAG;
+  SIScheduleBlockCreator BlockCreator;
+
+public:
+  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {};
+
+  ~SIScheduler() {};
+
+  struct SIScheduleBlockResult
+  scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
+                  SISchedulerBlockSchedulerVariant ScheduleVariant);
+};
+
+class SIScheduleDAGMI : public ScheduleDAGMILive {
+  const SIInstrInfo *SITII;
+  const SIRegisterInfo *SITRI;
+
+  std::vector<SUnit> SUnitsLinksBackup;
+
+  // For moveLowLatencies. After all Scheduling variants are tested.
+  std::vector<unsigned> ScheduledSUnits;
+  std::vector<unsigned> ScheduledSUnitsInv;
+
+  unsigned VGPRSetID;
+  unsigned SGPRSetID;
+
+public:
+  SIScheduleDAGMI(MachineSchedContext *C);
+
+  ~SIScheduleDAGMI() override;
+
+  // Entry point for the schedule.
+  void schedule() override;
+
+  // To init Block's RPTracker.
+  void initRPTracker(RegPressureTracker &RPTracker) {
+    RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+  }
+
+  MachineBasicBlock *getBB() { return BB; }
+  MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; };
+  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; };
+  LiveIntervals *getLIS() { return LIS; }
+  MachineRegisterInfo *getMRI() { return &MRI; }
+  const TargetRegisterInfo *getTRI() { return TRI; }
+  SUnit& getEntrySU() { return EntrySU; };
+  SUnit& getExitSU() { return ExitSU; };
+
+  void restoreSULinksLeft();
+
+  template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
+                                                     _Iterator End,
+                                                     unsigned &VgprUsage,
+                                                     unsigned &SgprUsage);
+  std::set<unsigned> getInRegs() {
+    std::set<unsigned> InRegs (RPTracker.getPressure().LiveInRegs.begin(),
+                               RPTracker.getPressure().LiveInRegs.end());
+    return InRegs;
+  };
+
+  unsigned getVGPRSetID() const { return VGPRSetID; }
+  unsigned getSGPRSetID() const { return SGPRSetID; }
+
+private:
+  void topologicalSort();
+  // After scheduling is done, improve low latency placements.
+  void moveLowLatencies();
+
+public:
+  // Some stats for scheduling inside blocks.
+  std::vector<unsigned> IsLowLatencySU;
+  std::vector<unsigned> LowLatencyOffset;
+  std::vector<unsigned> IsHighLatencySU;
+  // Topological sort
+  // Maps topological index to the node number.
+  std::vector<int> TopDownIndex2SU;
+  std::vector<int> BottomUpIndex2SU;
+};
+
+} // namespace llvm
+
+#endif /* SIMACHINESCHEDULER_H_ */
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2afa009..609f5e7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -23,7 +23,20 @@
 
 using namespace llvm;
 
-SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {
+  unsigned NumRegPressureSets = getNumRegPressureSets();
+
+  SGPR32SetID = NumRegPressureSets;
+  VGPR32SetID = NumRegPressureSets;
+  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
+    if (strncmp("SGPR_32", getRegPressureSetName(i), 7) == 0)
+      SGPR32SetID = i;
+    else if (strncmp("VGPR_32", getRegPressureSetName(i), 7) == 0)
+      VGPR32SetID = i;
+  }
+  assert(SGPR32SetID < NumRegPressureSets &&
+         VGPR32SetID < NumRegPressureSets);
+}
 
 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
   MCRegAliasIterator R(Reg, this, true);
@@ -36,18 +49,15 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
   const MachineFunction &MF) const {
   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
   if (ST.hasSGPRInitBug()) {
-    unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
-    if (ST.isXNACKEnabled())
-      BaseIdx -= 4;
-
+    // Leave space for flat_scr, xnack_mask, vcc, and alignment
+    unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 8 - 4;
     unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
     return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
   }
 
   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
-    // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
-    // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
-    // either way.
+    // 96/97 need to be reserved for flat_scr, 98/99 for xnack_mask, and
+    // 100/101 for vcc. This is the next sgpr128 down.
     return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
   }
 
@@ -58,25 +68,14 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
   const MachineFunction &MF) const {
   const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
   if (ST.hasSGPRInitBug()) {
-    unsigned Idx;
-
-    if (!ST.isXNACKEnabled())
-      Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
-    else
-      Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
-
+    unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
     return AMDGPU::SGPR_32RegClass.getRegister(Idx);
   }
 
   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
-    if (!ST.isXNACKEnabled()) {
-      // Next register before reservations for flat_scr and vcc.
-      return AMDGPU::SGPR97;
-    } else {
-      // Next register before reservations for flat_scr, xnack_mask, vcc,
-      // and scratch resource.
-      return AMDGPU::SGPR91;
-    }
+    // Next register before reservations for flat_scr, xnack_mask, vcc,
+    // and scratch resource.
+    return AMDGPU::SGPR91;
   }
 
   return AMDGPU::SGPR95;
@@ -99,23 +98,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
     // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation
-    // for VCC/FLAT_SCR.
+    // for VCC/XNACK_MASK/FLAT_SCR.
+    //
+    // TODO The SGPRs that alias to XNACK_MASK could be used as general purpose
+    // SGPRs when the XNACK feature is not used. This is currently not done
+    // because the code that counts SGPRs cannot account for such holes.
+    reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
     reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
     reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
-
-    if (ST.isXNACKEnabled())
-      reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
   }
 
   // Tonga and Iceland can only allocate a fixed number of SGPRs due
   // to a hw bug.
   if (ST.hasSGPRInitBug()) {
     unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
-    // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
-    unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
-
-    if (ST.isXNACKEnabled())
-      Limit -= 2;
+    // Reserve some SGPRs for FLAT_SCRATCH, XNACK_MASK, and VCC (6 SGPRs).
+    unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6;
 
     for (unsigned i = Limit; i < NumSGPRs; ++i) {
       unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
@@ -479,12 +477,38 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
   if (SubIdx == AMDGPU::NoSubRegister)
     return RC;
 
-  // If this register has a sub-register, we can safely assume it is a 32-bit
-  // register, because all of SI's sub-registers are 32-bit.
+  // We can assume that each lane corresponds to one 32-bit register.
+  unsigned Count = countPopulation(getSubRegIndexLaneMask(SubIdx));
   if (isSGPRClass(RC)) {
-    return &AMDGPU::SGPR_32RegClass;
+    switch (Count) {
+    case 1:
+      return &AMDGPU::SGPR_32RegClass;
+    case 2:
+      return &AMDGPU::SReg_64RegClass;
+    case 4:
+      return &AMDGPU::SReg_128RegClass;
+    case 8:
+      return &AMDGPU::SReg_256RegClass;
+    case 16: /* fall-through */
+    default:
+      llvm_unreachable("Invalid sub-register class size");
+    }
   } else {
-    return &AMDGPU::VGPR_32RegClass;
+    switch (Count) {
+    case 1:
+      return &AMDGPU::VGPR_32RegClass;
+    case 2:
+      return &AMDGPU::VReg_64RegClass;
+    case 3:
+      return &AMDGPU::VReg_96RegClass;
+    case 4:
+      return &AMDGPU::VReg_128RegClass;
+    case 8:
+      return &AMDGPU::VReg_256RegClass;
+    case 16: /* fall-through */
+    default:
+      llvm_unreachable("Invalid sub-register class size");
+    }
   }
 }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 1795237..9410e20 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -25,6 +25,9 @@ namespace llvm {
 
 struct SIRegisterInfo : public AMDGPURegisterInfo {
 private:
+  unsigned SGPR32SetID;
+  unsigned VGPR32SetID;
+
   void reserveRegisterTuples(BitVector &, unsigned Reg) const;
 
 public:
@@ -146,6 +149,9 @@ public:
   unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
                               const TargetRegisterClass *RC) const;
 
+  unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
+  unsigned getVGPR32PressureSet() const { return VGPR32SetID; };
+
 private:
   void buildScratchLoadStore(MachineBasicBlock::iterator MI,
                              unsigned LoadStoreOp, unsigned Value,
diff --git a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
index dbdc76b..d36c5d2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -98,6 +98,9 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
   SmallVector <Type*, 8> Types;
   bool NeedToReplace = false;
   Function *F = I.getCalledFunction();
+  if (!F)
+    return;
+
   std::string Name = F->getName();
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
     Value *Arg = I.getArgOperand(i);
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index add415e..3b4c235 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -106,20 +106,27 @@ bool isReadOnlySegment(const GlobalValue *GV) {
   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
 }
 
-static const char ShaderTypeAttribute[] = "ShaderType";
-
-unsigned getShaderType(const Function &F) {
-  Attribute A = F.getFnAttribute(ShaderTypeAttribute);
-  unsigned ShaderType = ShaderType::COMPUTE;
+static unsigned getIntegerAttribute(const Function &F, const char *Name,
+                                    unsigned Default) {
+  Attribute A = F.getFnAttribute(Name);
+  unsigned Result = Default;
 
   if (A.isStringAttribute()) {
     StringRef Str = A.getValueAsString();
-    if (Str.getAsInteger(0, ShaderType)) {
+    if (Str.getAsInteger(0, Result)) {
       LLVMContext &Ctx = F.getContext();
       Ctx.emitError("can't parse shader type");
     }
   }
-  return ShaderType;
+  return Result;
+}
+
+unsigned getShaderType(const Function &F) {
+  return getIntegerAttribute(F, "ShaderType", ShaderType::COMPUTE);
+}
+
+unsigned getInitialPSInputAddr(const Function &F) {
+  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 }
 
 bool isSI(const MCSubtargetInfo &STI) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 19419a2..57cbe1b5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -45,6 +45,8 @@ bool isGlobalSegment(const GlobalValue *GV);
 bool isReadOnlySegment(const GlobalValue *GV);
 
 unsigned getShaderType(const Function &F);
+unsigned getInitialPSInputAddr(const Function &F);
+
 
 bool isSI(const MCSubtargetInfo &STI);
 bool isCI(const MCSubtargetInfo &STI);
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 419717c..a520770 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -87,9 +87,22 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     }
   }
 
+  if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS)
+    return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
+               ? CSR_iOS_CXX_TLS_PE_SaveList
+               : CSR_iOS_CXX_TLS_SaveList;
   return RegList;
 }
 
+const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
+    const MachineFunction *MF) const {
+  assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+      MF->getInfo<ARMFunctionInfo>()->isSplitCSR())
+    return CSR_iOS_CXX_TLS_ViaCopy_SaveList;
+  return nullptr;
+}
+
 const uint32_t *
 ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                           CallingConv::ID CC) const {
@@ -97,6 +110,8 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
   if (CC == CallingConv::GHC)
     // This is academic becase all GHC calls are (supposed to be) tail calls
     return CSR_NoRegs_RegMask;
+  if (STI.isTargetDarwin() && CC == CallingConv::CXX_FAST_TLS)
+    return CSR_iOS_CXX_TLS_RegMask;
   return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
 }
 
@@ -106,6 +121,14 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
 }
 
 const uint32_t *
+ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
+  assert(MF.getSubtarget<ARMSubtarget>().isTargetDarwin() &&
+         "only know about special TLS call on Darwin");
+  return CSR_iOS_TLSCall_RegMask;
+}
+
+
+const uint32_t *
 ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
                                                 CallingConv::ID CC) const {
   const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index cea8b80..6a9a45a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -62,6 +62,12 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
   switch (Reg) {
     case D15: case D14: case D13: case D12:
     case D11: case D10: case D9:  case D8:
+    case D7:  case D6:  case D5:  case D4:
+    case D3:  case D2:  case D1:  case D0:
+    case D31: case D30: case D29: case D28:
+    case D27: case D26: case D25: case D24:
+    case D23: case D22: case D21: case D20:
+    case D19: case D18: case D17: case D16:
       return true;
     default:
       return false;
@@ -92,9 +98,12 @@ protected:
 public:
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const MCPhysReg *
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
   const uint32_t *getNoPreservedMask() const override;
+  const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const;
 
   /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
   /// case that 'returned' is on an i32 first argument if the calling convention
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 2335164..847ef87 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -225,6 +225,21 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
 def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
                                          (sub CSR_AAPCS_ThisReturn, R9))>;
 
+def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
+                                           (sequence "R%u", 12, 1),
+                                           (sequence "D%u", 31, 0))>;
+
+// C++ TLS access function saves all registers except SP. Try to match
+// the order of CSRs in CSR_iOS.
+def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1),
+                                           (sequence "D%u", 31, 0))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>;
+
 // The "interrupt" attribute is used to generate code that is acceptable in
 // exception-handlers of various kinds. It makes us use a different return
 // instruction (handled elsewhere) and affects which registers we must return to
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 9bdf823c..ff2fcfa 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -578,7 +578,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
 
 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
   // For now 32-bit only.
-  if (VT != MVT::i32) return 0;
+  if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
 
   Reloc::Model RelocM = TM.getRelocationModel();
   bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
@@ -2083,6 +2083,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
   if (!FuncInfo.CanLowerReturn)
     return false;
 
+  if (TLI.supportSplitCSR(FuncInfo.MF))
+    return false;
+
   // Build a list of return value registers.
   SmallVector<unsigned, 4> RetRegs;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 0242440..dfbb969 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -622,7 +622,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
     }
 
     if (N.getOpcode() == ARMISD::Wrapper &&
-        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
       Base = N.getOperand(0);
     } else
       Base = N;
@@ -801,7 +802,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       Base = CurDAG->getTargetFrameIndex(
           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     } else if (N.getOpcode() == ARMISD::Wrapper &&
-               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+               N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
@@ -1067,7 +1069,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
       Base = CurDAG->getTargetFrameIndex(
           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     } else if (N.getOpcode() == ARMISD::Wrapper &&
-               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+               N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -1186,7 +1189,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
     if (N.getOpcode() == ISD::ADD) {
       return false; // We want to select register offset instead
     } else if (N.getOpcode() == ARMISD::Wrapper &&
-               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
       Base = N.getOperand(0);
     } else {
       Base = N;
@@ -1292,7 +1296,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
     }
 
     if (N.getOpcode() == ARMISD::Wrapper &&
-        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::TargetConstantPool)
         return false;  // We want to select t2LDRpci instead.
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9cfb06b..37c0795 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -744,7 +744,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
   }
 
-  if (!Subtarget->isThumb1Only())
+  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
     setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
 
   // ARM does not have ROTL.
@@ -1385,6 +1385,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
     else
       return CallingConv::ARM_AAPCS;
   case CallingConv::Fast:
+  case CallingConv::CXX_FAST_TLS:
     if (!Subtarget->isAAPCS_ABI()) {
       if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
         return CallingConv::Fast;
@@ -2347,6 +2348,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
+  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *I =
+      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+  if (I) {
+    for (; *I; ++I) {
+      if (ARM::GPRRegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::i32));
+      else if (ARM::DPRRegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+      else
+        llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+    }
+  }
 
   // Update chain and glue.
   RetOps[0] = Chain;
@@ -2530,6 +2544,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
 }
 
+/// \brief Convert a TLS address reference into the correct sequence of loads
+/// and calls to compute the variable's address for Darwin, and return an
+/// SDValue containing the final node.
+
+/// Darwin only has one TLS scheme which must be capable of dealing with the
+/// fully general situation, in the worst case. This means:
+///     + "extern __thread" declaration.
+///     + Defined in a possibly unknown dynamic library.
+///
+/// The general system is that each __thread variable has a [3 x i32] descriptor
+/// which contains information used by the runtime to calculate the address. The
+/// only part of this the compiler needs to know about is the first word, which
+/// contains a function pointer that must be called with the address of the
+/// entire descriptor in "r0".
+///
+/// Since this descriptor may be in a different unit, in general access must
+/// proceed along the usual ARM rules. A common sequence to produce is:
+///
+///     movw rT1, :lower16:_var$non_lazy_ptr
+///     movt rT1, :upper16:_var$non_lazy_ptr
+///     ldr r0, [rT1]
+///     ldr rT2, [r0]
+///     blx rT2
+///     [...address now in r0...]
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+  SDLoc DL(Op);
+
+  // First step is to get the address of the actua global symbol. This is where
+  // the TLS descriptor lives.
+  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
+
+  // The first entry in the descriptor is a function pointer that we must call
+  // to obtain the address of the variable.
+  SDValue Chain = DAG.getEntryNode();
+  SDValue FuncTLVGet =
+      DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
+                  MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                  false, true, true, 4);
+  Chain = FuncTLVGet.getValue(1);
+
+  MachineFunction &F = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = F.getFrameInfo();
+  MFI->setAdjustsStack(true);
+
+  // TLS calls preserve all registers except those that absolutely must be
+  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
+  // silly).
+  auto TRI =
+      getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
+  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
+  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
+
+  // Finally, we can make the call. This is just a degenerate version of a
+  // normal AArch64 call node: r0 takes the address of the descriptor, and
+  // returns the address of the variable in this thread.
+  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
+  Chain =
+      DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+                  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
+                  DAG.getRegisterMask(Mask), Chain.getValue(1));
+  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
+}
+
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
 SDValue
 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@@ -2631,9 +2711,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
 
 SDValue
 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->isTargetDarwin())
+    return LowerGlobalTLSAddressDarwin(Op, DAG);
+
   // TODO: implement the "local dynamic" model
-  assert(Subtarget->isTargetELF() &&
-         "TLS not implemented for non-ELF targets");
+  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   if (DAG.getTarget().Options.EmulatedTLS)
     return LowerToTLSEmulatedModel(GA, DAG);
@@ -11407,7 +11489,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         return;
 
       case 'J':
-        if (Subtarget->isThumb()) {  // FIXME thumb2
+        if (Subtarget->isThumb1Only()) {
           // This must be a constant between -255 and -1, for negated ADD
           // immediates. This can be used in GCC with an "n" modifier that
           // prints the negated value, for use with SUB instructions. It is
@@ -11476,7 +11558,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         return;
 
       case 'M':
-        if (Subtarget->isThumb()) { // FIXME thumb2
+        if (Subtarget->isThumb1Only()) {
           // This must be a multiple of 4 between 0 and 1020, for
           // ADD sp + immediate.
           if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
@@ -12324,3 +12406,49 @@ unsigned ARMTargetLowering::getExceptionSelectorRegister(
   // via the personality function.
   return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
 }
+
+void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+  // Update IsSplitCSR in ARMFunctionInfo.
+  ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
+  AFI->setIsSplitCSR(true);
+}
+
+void ARMTargetLowering::insertCopiesSplitCSR(
+    MachineBasicBlock *Entry,
+    const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+  if (!IStart)
+    return;
+
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  for (const MCPhysReg *I = IStart; *I; ++I) {
+    const TargetRegisterClass *RC = nullptr;
+    if (ARM::GPRRegClass.contains(*I))
+      RC = &ARM::GPRRegClass;
+    else if (ARM::DPRRegClass.contains(*I))
+      RC = &ARM::DPRRegClass;
+    else
+      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+    unsigned NewVR = MRI->createVirtualRegister(RC);
+    // Create copy from CSR to a virtual register.
+    // FIXME: this currently does not emit CFI pseudo-instructions, it works
+    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+    // nounwind. If we want to generalize this later, we may need to emit
+    // CFI pseudo-instructions.
+    assert(Entry->getParent()->getFunction()->hasFnAttribute(
+               Attribute::NoUnwind) &&
+           "Function should be nounwind in insertCopiesSplitCSR!");
+    Entry->addLiveIn(*I);
+    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+            NewVR)
+        .addReg(*I);
+
+    for (auto *Exit : Exits)
+      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+              *I)
+          .addReg(NewVR);
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index b764624..96b56c3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -526,6 +526,8 @@ namespace llvm {
     SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
                                  SelectionDAG &DAG,
                                  TLSModel::Model model) const;
+    SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -578,6 +580,15 @@ namespace llvm {
                             SmallVectorImpl<SDValue> &InVals,
                             bool isThisReturn, SDValue ThisVal) const;
 
+    bool supportSplitCSR(MachineFunction *MF) const override {
+      return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+          MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+    }
+    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+    void insertCopiesSplitCSR(
+      MachineBasicBlock *Entry,
+      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
     SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index b9de83b..c446ba3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -5398,6 +5398,27 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
                     Requires<[IsARM, UseMovt]>;
 } // isReMaterializable
 
+// The many different faces of TLS access.
+def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst),
+             (MOVi32imm tglobaltlsaddr :$dst)>,
+      Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapper tglobaltlsaddr:$src),
+          (LDRLIT_ga_abs tglobaltlsaddr:$src)>,
+      Requires<[IsARM, DontUseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+          (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+          (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+      Requires<[IsARM, DontUseMovt]>;
+let AddedComplexity = 10 in
+def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)),
+          (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>,
+      Requires<[IsARM, UseMovt]>;
+
+
 // ConstantPool, GlobalAddress, and JumpTable
 def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
 def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 7020ffb..defef4e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -5689,7 +5689,10 @@ def : NEONInstAlias<"vmov${p} $Vd, $Vm",
 
 //   VMOV     : Vector Move (Immediate)
 
-let isReMaterializable = 1 in {
+// Although VMOVs are not strictly speaking cheap, they are as expensive
+// as their copies counterpart (VORR), so we should prefer rematerialization
+// over splitting when it applies.
+let isReMaterializable = 1, isAsCheapAsAMove=1 in {
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
                          (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
                          "vmov", "i8", "$Vd, $SIMM", "",
@@ -5744,7 +5747,7 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
                          (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
                          "vmov", "f32", "$Vd, $SIMM", "",
                          [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
-} // isReMaterializable
+} // isReMaterializable, isAsCheapAsAMove
 
 // Add support for bytes replication feature, so it could be GAS compatible.
 // E.g. instructions below:
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index df6f243..5b1f9a0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1366,6 +1366,14 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
                                       (ARMWrapper tglobaladdr:$src))]>,
                      Requires<[IsThumb, DontUseMovt]>;
 
+// TLS globals
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+          (tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+      Requires<[IsThumb, DontUseMovt]>;
+def : Pat<(ARMWrapper tglobaltlsaddr:$addr),
+          (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>,
+      Requires<[IsThumb, DontUseMovt]>;
+
 
 // JumpTable
 def : T1Pat<(ARMWrapperJT tjumptable:$dst),
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index d460d33..f42f456 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -3875,6 +3875,13 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
 
 }
 
+def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst),
+            (t2MOV_ga_pcrel tglobaltlsaddr:$dst)>,
+      Requires<[IsThumb2, UseMovt]>;
+def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst),
+            (t2MOVi32imm tglobaltlsaddr:$dst)>,
+      Requires<[IsThumb2, UseMovt]>;
+
 // ConstantPool, GlobalAddress, and JumpTable
 def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
 def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index 050cd1a..63e7940 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -930,10 +930,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
 // and could enable the conversion to float to be removed completely.
 def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
           (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
-      Requires<[IsARM]>;
+      Requires<[IsARM, HasV6T2]>;
 def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
           (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
-      Requires<[IsThumb2]>;
+      Requires<[IsThumb2, HasV6T2]>;
 def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
           (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
       Requires<[IsARM]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index ac0330f..71ad7a4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -20,4 +20,5 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
       RestoreSPFromFP(false), LRSpilledForFarJump(false),
       FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
       GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
-      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
+      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+      IsSplitCSR(false) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index d644797..68f9aec 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -118,6 +118,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// coalesced weights.
   DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
 
+  /// True if this function has a subset of CSRs that is handled explicitly via
+  /// copies.
+  bool IsSplitCSR;
+
 public:
   ARMFunctionInfo() :
     isThumb(false),
@@ -128,7 +132,7 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
     NumAlignedDPRCS2Regs(0), PICLabelUId(0),
-    VarArgsFrameIndex(0), HasITBlocks(false) {}
+    VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {}
 
   explicit ARMFunctionInfo(MachineFunction &MF);
 
@@ -199,6 +203,9 @@ public:
   bool hasITBlocks() const { return HasITBlocks; }
   void setHasITBlocks(bool h) { HasITBlocks = h; }
 
+  bool isSplitCSR() const { return IsSplitCSR; }
+  void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
   void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
     if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
       llvm_unreachable("Duplicate entries!");
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6084f22..57577dc 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -388,6 +388,9 @@ private:
 
   size_t calculateContentSize() const;
 
+  // Reset state between object emissions
+  void reset() override;
+
 public:
   ARMTargetELFStreamer(MCStreamer &S)
     : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
@@ -415,7 +418,7 @@ public:
                  MCCodeEmitter *Emitter, bool IsThumb)
       : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
         MappingSymbolCounter(0), LastEMS(EMS_None) {
-    Reset();
+    EHReset();
   }
 
   ~ARMELFStreamer() {}
@@ -579,7 +582,10 @@ private:
   }
 
   // Helper functions for ARM exception handling directives
-  void Reset();
+  void EHReset();
+
+  // Reset state between object emissions
+  void reset() override;
 
   void EmitPersonalityFixup(StringRef Name);
   void FlushPendingOffset();
@@ -1040,6 +1046,8 @@ void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
   getStreamer().emitInst(Inst, Suffix);
 }
 
+void ARMTargetELFStreamer::reset() { AttributeSection = nullptr; }
+
 void ARMELFStreamer::FinishImpl() {
   MCTargetStreamer &TS = *getTargetStreamer();
   ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
@@ -1048,6 +1056,18 @@ void ARMELFStreamer::FinishImpl() {
   MCELFStreamer::FinishImpl();
 }
 
+void ARMELFStreamer::reset() {
+  MCTargetStreamer &TS = *getTargetStreamer();
+  ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+  ATS.reset();
+  MappingSymbolCounter = 0;
+  MCELFStreamer::reset();
+  // MCELFStreamer clear's the assembler's e_flags. However, for
+  // arm we manually set the ABI version on streamer creation, so
+  // do the same here
+  getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+}
+
 inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
                                               unsigned Type,
                                               unsigned Flags,
@@ -1094,7 +1114,7 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
                                               Kind));
 }
 
-void ARMELFStreamer::Reset() {
+void ARMELFStreamer::EHReset() {
   ExTab = nullptr;
   FnStart = nullptr;
   Personality = nullptr;
@@ -1164,7 +1184,7 @@ void ARMELFStreamer::emitFnEnd() {
   SwitchSection(&FnStart->getSection());
 
   // Clean exception handling frame information
-  Reset();
+  EHReset();
 }
 
 void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index dad50f2..c0d10c8 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -38,6 +38,9 @@ void ARMTargetStreamer::emitCurrentConstantPool() {
 // finish() - write out any non-empty assembler constant pools.
 void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
 
+// reset() - Reset any state
+void ARMTargetStreamer::reset() {}
+
 // The remaining callbacks should be handled separately by each
 // streamer.
 void ARMTargetStreamer::emitFnStart() {}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index 812f983..27faac6 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -53,6 +53,11 @@ public:
   /// \p MBB will be correctly handled by the target.
   bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
 
+  /// Disable shrink wrap as tBfar/BL will be used to adjust for long jumps.
+  bool enableShrinkWrapping(const MachineFunction &MF) const override {
+    return false;
+  }
+
 private:
   /// Check if the frame lowering of \p MF needs a special fixup
   /// code sequence for the epilogue.
diff --git a/contrib/llvm/lib/Target/AVR/AVR.h b/contrib/llvm/lib/Target/AVR/AVR.h
new file mode 100644
index 0000000..4c1667e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVR.h
@@ -0,0 +1,54 @@
+//===-- AVR.h - Top-level interface for AVR representation ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AVR back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_H
+#define LLVM_AVR_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+class AVRTargetMachine;
+class FunctionPass;
+
+FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
+                               CodeGenOpt::Level OptLevel);
+FunctionPass *createAVRExpandPseudoPass();
+FunctionPass *createAVRFrameAnalyzerPass();
+FunctionPass *createAVRDynAllocaSRPass();
+FunctionPass *createAVRBranchSelectionPass();
+
+/**
+ * Contains the AVR backend.
+ */
+namespace AVR {
+
+enum AddressSpace { DataMemory, ProgramMemory };
+
+template <typename T> bool isProgramMemoryAddress(T *V) {
+  return cast<PointerType>(V->getType())->getAddressSpace() == ProgramMemory;
+}
+
+inline bool isProgramMemoryAccess(MemSDNode const *N) {
+  auto V = N->getMemOperand()->getValue();
+
+  return (V != nullptr) ? isProgramMemoryAddress(V) : false;
+}
+
+} // end of namespace AVR
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_H
diff --git a/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h b/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h
new file mode 100644
index 0000000..ee832ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- AVRSelectionDAGInfo.h - AVR SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AVR subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_SELECTION_DAG_INFO_H
+#define LLVM_AVR_SELECTION_DAG_INFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+/**
+ * Holds information about the AVR instruction selection DAG.
+ */
+class AVRSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_SELECTION_DAG_INFO_H
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
new file mode 100644
index 0000000..85f03e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -0,0 +1,40 @@
+//===-- AVRTargetObjectFile.cpp - AVR Object Files ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRTargetObjectFile.h"
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+
+#include "AVR.h"
+
+namespace llvm {
+void AVRTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+  Base::Initialize(Ctx, TM);
+  ProgmemDataSection =
+      Ctx.getELFSection(".progmem.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+}
+
+MCSection *
+AVRTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind, Mangler &Mang,
+                                            const TargetMachine &TM) const {
+  // Global values in flash memory are placed in the progmem.data section
+  // unless they already have a user assigned section.
+  if (AVR::isProgramMemoryAddress(GV) && !GV->hasSection())
+    return ProgmemDataSection;
+
+  // Otherwise, we work the same way as ELF.
+  return Base::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h
new file mode 100644
index 0000000..bdda35b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h
@@ -0,0 +1,35 @@
+//===-- AVRTargetObjectFile.h - AVR Object Info -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_TARGET_OBJECT_FILE_H
+#define LLVM_AVR_TARGET_OBJECT_FILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+/**
+ * Lowering for an AVR ELF32 object file.
+ */
+class AVRTargetObjectFile : public TargetLoweringObjectFileELF {
+  typedef TargetLoweringObjectFileELF Base;
+
+public:
+  void Initialize(MCContext &ctx, const TargetMachine &TM) override;
+
+  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                    Mangler &Mang,
+                                    const TargetMachine &TM) const override;
+
+private:
+  MCSection *ProgmemDataSection;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_TARGET_OBJECT_FILE_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index e213089..4c7c039 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -190,9 +190,9 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
-		    MCStreamer &OutStreamer,
-                    const MCOperand &Imm, int AlignSize) {
+static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
+                           MCStreamer &OutStreamer, const MCOperand &Imm,
+                           int AlignSize) {
   MCSymbol *Sym;
   int64_t Value;
   if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 77907b0..4d2b545 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1275,6 +1275,8 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
     if (!BT.has(RD.Reg))
       continue;
     const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+    auto At = MI->isPHI() ? B.getFirstNonPHI()
+                          : MachineBasicBlock::iterator(MI);
 
     // Find a source operand that is equal to the result.
     for (auto &Op : MI->uses()) {
@@ -1298,7 +1300,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
       DebugLoc DL = MI->getDebugLoc();
       const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
       unsigned NewR = MRI.createVirtualRegister(FRC);
-      BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR)
+      BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
           .addReg(RS.Reg, 0, RS.Sub);
       HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
       BT.put(BitTracker::RegisterRef(NewR), SC);
@@ -1925,7 +1927,9 @@ bool BitSimplification::genPackhl(MachineInstr *MI,
   MachineBasicBlock &B = *MI->getParent();
   unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
   DebugLoc DL = MI->getDebugLoc();
-  BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR)
+  auto At = MI->isPHI() ? B.getFirstNonPHI()
+                        : MachineBasicBlock::iterator(MI);
+  BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
       .addReg(Rs.Reg, 0, Rs.Sub)
       .addReg(Rt.Reg, 0, Rt.Sub);
   HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
@@ -1950,9 +1954,11 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI,
   // Prefer zxth, since zxth can go in any slot, while extractu only in
   // slots 2 and 3.
   unsigned NewR = 0;
+  auto At = MI->isPHI() ? B.getFirstNonPHI()
+                        : MachineBasicBlock::iterator(MI);
   if (L.Low && Opc != Hexagon::A2_zxth) {
     NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
-    BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR)
+    BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
         .addReg(L.Reg, 0, L.Sub);
   } else if (!L.Low && Opc != Hexagon::S2_extractu) {
     NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
@@ -1989,7 +1995,9 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI,
   MachineBasicBlock &B = *MI->getParent();
   DebugLoc DL = MI->getDebugLoc();
   unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
-  BuildMI(B, MI, DL, HII.get(COpc), NewR)
+  auto At = MI->isPHI() ? B.getFirstNonPHI()
+                        : MachineBasicBlock::iterator(MI);
+  BuildMI(B, At, DL, HII.get(COpc), NewR)
       .addReg(H.Reg, 0, H.Sub)
       .addReg(L.Reg, 0, L.Sub);
   HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
@@ -2043,7 +2051,9 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
       continue;
 
     unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
-    auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR)
+    auto At = MI->isPHI() ? B.getFirstNonPHI()
+                          : MachineBasicBlock::iterator(MI);
+    auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
                   .addReg(RS.Reg, 0, RS.Sub);
     if (NewOpc == Hexagon::A2_andir)
       MIB.addImm((1 << W) - 1);
@@ -2076,6 +2086,8 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
   if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
     return false;
   MachineBasicBlock &B = *MI->getParent();
+  auto At = MI->isPHI() ? B.getFirstNonPHI()
+                        : MachineBasicBlock::iterator(MI);
 
   const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
   const BitTracker::BitValue &V = SC[F+BN];
@@ -2098,7 +2110,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
     }
     if (P != UINT_MAX) {
       unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
-      BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
+      BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
           .addReg(RR.Reg, 0, RR.Sub)
           .addImm(P);
       HBS::replaceReg(RD.Reg, NewR, MRI);
@@ -2108,7 +2120,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
   } else if (V.is(0) || V.is(1)) {
     unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
     unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
-    BuildMI(B, MI, DL, HII.get(NewOpc), NewR);
+    BuildMI(B, At, DL, HII.get(NewOpc), NewR);
     HBS::replaceReg(RD.Reg, NewR, MRI);
     return true;
   }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 87d6b35..37c2042 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -3320,6 +3320,7 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
                                       /* u16_0Imm */ addr{15-0})));
     // Store upper-half and store doubleword cannot be NV.
     let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+    let Uses = !if (isAbs, [], [GP]);
 
     let IClass = 0b0100;
     let Inst{27} = 1;
@@ -3425,6 +3426,7 @@ class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
                      !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
                      !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
                                       /* u16_0Imm */ addr{15-0})));
+    let Uses = !if (isAbs, [], [GP]);
     let IClass = 0b0100;
 
     let Inst{27} = 1;
@@ -3736,7 +3738,7 @@ defm loadrd  : LD_Abs<"memd",  "LDrid", DoubleRegs, u16_3Imm, 0b110>;
 // if ([!]Pv[.new]) Rx=mem[bhwd](##global)
 //===----------------------------------------------------------------------===//
 
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Uses = [GP] in
 class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
                 bits<3> MajOp>
   : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp
new file mode 100644
index 0000000..06719cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp
@@ -0,0 +1,60 @@
+//===--- HexagonRDF.cpp ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRDF.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+
+#include "llvm/CodeGen/MachineInstr.h"
+
+using namespace llvm;
+using namespace rdf;
+
+bool HexagonRegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const {
+  if (RA == RB)
+    return true;
+
+  if (TargetRegisterInfo::isVirtualRegister(RA.Reg) &&
+      TargetRegisterInfo::isVirtualRegister(RB.Reg)) {
+    // Hexagon-specific cases.
+    if (RA.Reg == RB.Reg) {
+      if (RA.Sub == 0)
+        return true;
+      if (RB.Sub == 0)
+        return false;
+    }
+  }
+
+  return RegisterAliasInfo::covers(RA, RB);
+}
+
+bool HexagonRegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR)
+      const {
+  if (RRs.count(RR))
+    return true;
+
+  if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg)) {
+    assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
+    // Check if both covering subregisters are present.
+    bool HasLo = RRs.count({RR.Reg, Hexagon::subreg_loreg});
+    bool HasHi = RRs.count({RR.Reg, Hexagon::subreg_hireg});
+    if (HasLo && HasHi)
+      return true;
+  }
+
+  if (RR.Sub == 0) {
+    // Check if both covering subregisters are present.
+    unsigned Lo = TRI.getSubReg(RR.Reg, Hexagon::subreg_loreg);
+    unsigned Hi = TRI.getSubReg(RR.Reg, Hexagon::subreg_hireg);
+    if (RRs.count({Lo, 0}) && RRs.count({Hi, 0}))
+      return true;
+  }
+
+  return RegisterAliasInfo::covers(RRs, RR);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h
new file mode 100644
index 0000000..00c1889
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h
@@ -0,0 +1,28 @@
+//===--- HexagonRDF.h -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_RDF_H
+#define HEXAGON_RDF_H
+#include "RDFGraph.h"
+
+namespace llvm {
+  class TargetRegisterInfo;
+}
+
+namespace rdf {
+  struct HexagonRegisterAliasInfo : public RegisterAliasInfo {
+    HexagonRegisterAliasInfo(const TargetRegisterInfo &TRI)
+      : RegisterAliasInfo(TRI) {}
+    bool covers(RegisterRef RA, RegisterRef RR) const override;
+    bool covers(const RegisterSet &RRs, RegisterRef RR) const override;
+  };
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
new file mode 100644
index 0000000..3fcda984
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -0,0 +1,272 @@
+//===--- HexagonRDFOpt.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "HexagonRDF.h"
+#include "HexagonSubtarget.h"
+#include "RDFCopy.h"
+#include "RDFDeadCode.h"
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+namespace llvm {
+  void initializeHexagonRDFOptPass(PassRegistry&);
+  FunctionPass *createHexagonRDFOpt();
+}
+
+namespace {
+  cl::opt<unsigned> RDFLimit("rdf-limit", cl::init(UINT_MAX));
+  unsigned RDFCount = 0;
+  cl::opt<bool> RDFDump("rdf-dump", cl::init(false));
+
+  class HexagonRDFOpt : public MachineFunctionPass {
+  public:
+    HexagonRDFOpt() : MachineFunctionPass(ID) {
+      initializeHexagonRDFOptPass(*PassRegistry::getPassRegistry());
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineDominanceFrontier>();
+      AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    const char *getPassName() const override {
+      return "Hexagon RDF optimizations";
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    static char ID;
+
+  private:
+    MachineDominatorTree *MDT;
+    MachineRegisterInfo *MRI;
+  };
+
+  char HexagonRDFOpt::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false)
+
+
+struct HexagonDCE : public DeadCodeElimination {
+  HexagonDCE(DataFlowGraph &G, MachineRegisterInfo &MRI)
+    : DeadCodeElimination(G, MRI) {}
+  bool rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove);
+  void removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum);
+
+  bool run();
+};
+
+
+bool HexagonDCE::run() {
+  bool Collected = collect();
+  if (!Collected)
+    return false;
+
+  const SetVector<NodeId> &DeadNodes = getDeadNodes();
+  const SetVector<NodeId> &DeadInstrs = getDeadInstrs();
+
+  typedef DenseMap<NodeId,NodeId> RefToInstrMap;
+  RefToInstrMap R2I;
+  SetVector<NodeId> PartlyDead;
+  DataFlowGraph &DFG = getDFG();
+
+  for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+    for (auto TA : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Stmt>, DFG)) {
+      NodeAddr<StmtNode*> SA = TA;
+      for (NodeAddr<RefNode*> RA : SA.Addr->members(DFG)) {
+        R2I.insert(std::make_pair(RA.Id, SA.Id));
+        if (DFG.IsDef(RA) && DeadNodes.count(RA.Id))
+          if (!DeadInstrs.count(SA.Id))
+            PartlyDead.insert(SA.Id);
+      }
+    }
+  }
+
+  // Nodes to remove.
+  SetVector<NodeId> Remove = DeadInstrs;
+
+  bool Changed = false;
+  for (NodeId N : PartlyDead) {
+    auto SA = DFG.addr<StmtNode*>(N);
+    if (trace())
+      dbgs() << "Partly dead: " << *SA.Addr->getCode();
+    Changed |= rewrite(SA, Remove);
+  }
+
+  return erase(Remove) || Changed;
+}
+
+
+void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) {
+  MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+
+  auto getOpNum = [MI] (MachineOperand &Op) -> unsigned {
+    for (unsigned i = 0, n = MI->getNumOperands(); i != n; ++i)
+      if (&MI->getOperand(i) == &Op)
+        return i;
+    llvm_unreachable("Invalid operand");
+  };
+  DenseMap<NodeId,unsigned> OpMap;
+  NodeList Refs = IA.Addr->members(getDFG());
+  for (NodeAddr<RefNode*> RA : Refs)
+    OpMap.insert(std::make_pair(RA.Id, getOpNum(RA.Addr->getOp())));
+
+  MI->RemoveOperand(OpNum);
+
+  for (NodeAddr<RefNode*> RA : Refs) {
+    unsigned N = OpMap[RA.Id];
+    if (N < OpNum)
+      RA.Addr->setRegRef(&MI->getOperand(N));
+    else if (N > OpNum)
+      RA.Addr->setRegRef(&MI->getOperand(N-1));
+  }
+}
+
+
+bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) {
+  if (!getDFG().IsCode<NodeAttrs::Stmt>(IA))
+    return false;
+  DataFlowGraph &DFG = getDFG();
+  MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+  auto &HII = static_cast<const HexagonInstrInfo&>(DFG.getTII());
+  if (HII.getAddrMode(MI) != HexagonII::PostInc)
+    return false;
+  unsigned Opc = MI->getOpcode();
+  unsigned OpNum, NewOpc;
+  switch (Opc) {
+    case Hexagon::L2_loadri_pi:
+      NewOpc = Hexagon::L2_loadri_io;
+      OpNum = 1;
+      break;
+    case Hexagon::L2_loadrd_pi:
+      NewOpc = Hexagon::L2_loadrd_io;
+      OpNum = 1;
+      break;
+    case Hexagon::V6_vL32b_pi:
+      NewOpc = Hexagon::V6_vL32b_ai;
+      OpNum = 1;
+      break;
+    case Hexagon::S2_storeri_pi:
+      NewOpc = Hexagon::S2_storeri_io;
+      OpNum = 0;
+      break;
+    case Hexagon::S2_storerd_pi:
+      NewOpc = Hexagon::S2_storerd_io;
+      OpNum = 0;
+      break;
+    case Hexagon::V6_vS32b_pi:
+      NewOpc = Hexagon::V6_vS32b_ai;
+      OpNum = 0;
+      break;
+    default:
+      return false;
+  }
+  auto IsDead = [this] (NodeAddr<DefNode*> DA) -> bool {
+    return getDeadNodes().count(DA.Id);
+  };
+  NodeList Defs;
+  MachineOperand &Op = MI->getOperand(OpNum);
+  for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG)) {
+    if (&DA.Addr->getOp() != &Op)
+      continue;
+    Defs = DFG.getRelatedRefs(IA, DA);
+    if (!std::all_of(Defs.begin(), Defs.end(), IsDead))
+      return false;
+    break;
+  }
+
+  // Mark all nodes in Defs for removal.
+  for (auto D : Defs)
+    Remove.insert(D.Id);
+
+  if (trace())
+    dbgs() << "Rewriting: " << *MI;
+  MI->setDesc(HII.get(NewOpc));
+  MI->getOperand(OpNum+2).setImm(0);
+  removeOperand(IA, OpNum);
+  if (trace())
+    dbgs() << "       to: " << *MI;
+
+  return true;
+}
+
+
+bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) {
+  if (RDFLimit.getPosition()) {
+    if (RDFCount >= RDFLimit)
+      return false;
+    RDFCount++;
+  }
+
+  MDT = &getAnalysis<MachineDominatorTree>();
+  const auto &MDF = getAnalysis<MachineDominanceFrontier>();
+  const auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  const auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  MRI = &MF.getRegInfo();
+
+  HexagonRegisterAliasInfo HAI(HRI);
+  TargetOperandInfo TOI(HII);
+
+  if (RDFDump)
+    MF.print(dbgs() << "Before " << getPassName() << "\n", nullptr);
+  DataFlowGraph G(MF, HII, HRI, *MDT, MDF, HAI, TOI);
+  G.build();
+  if (RDFDump) {
+    dbgs() << PrintNode<FuncNode*>(G.getFunc(), G) << '\n';
+    dbgs() << MF.getName() << '\n';
+  }
+
+  bool Changed;
+  CopyPropagation CP(G);
+  CP.trace(RDFDump);
+  Changed = CP.run();
+  if (Changed)
+    G.build();
+
+  HexagonDCE DCE(G, *MRI);
+  DCE.trace(RDFDump);
+  Changed |= DCE.run();
+
+  if (Changed) {
+    Liveness LV(*MRI, G);
+    LV.trace(RDFDump);
+    LV.computeLiveIns();
+    LV.resetLiveIns();
+    LV.resetKills();
+  }
+
+  if (RDFDump)
+    MF.print(dbgs() << "After " << getPassName() << "\n", nullptr);
+  return false;
+}
+
+
+FunctionPass *llvm::createHexagonRDFOpt() {
+  return new HexagonRDFOpt();
+}
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 61c0589..6e5f732 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -103,6 +103,8 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
   Reserved.set(Hexagon::R30);
   Reserved.set(Hexagon::R31);
   Reserved.set(Hexagon::PC);
+  Reserved.set(Hexagon::GP);
+  Reserved.set(Hexagon::D14);
   Reserved.set(Hexagon::D15);
   Reserved.set(Hexagon::LC0);
   Reserved.set(Hexagon::LC1);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 9dccd69..34b03fb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -26,7 +26,11 @@
 
 using namespace llvm;
 
-static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
+
+static cl::opt<bool> EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore,
+  cl::init(true), cl::desc("Enable RDF-based optimizations"));
+
+static cl::opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
   cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
 
 static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
@@ -111,6 +115,7 @@ namespace llvm {
   FunctionPass *createHexagonOptimizeSZextends();
   FunctionPass *createHexagonPacketizer();
   FunctionPass *createHexagonPeephole();
+  FunctionPass *createHexagonRDFOpt();
   FunctionPass *createHexagonSplitConst32AndConst64();
   FunctionPass *createHexagonSplitDoubleRegs();
   FunctionPass *createHexagonStoreWidening();
@@ -262,9 +267,12 @@ void HexagonPassConfig::addPreRegAlloc() {
 }
 
 void HexagonPassConfig::addPostRegAlloc() {
-  if (getOptLevel() != CodeGenOpt::None)
+  if (getOptLevel() != CodeGenOpt::None) {
+    if (EnableRDFOpt)
+      addPass(createHexagonRDFOpt());
     if (!DisableHexagonCFGOpt)
       addPass(createHexagonCFGOptimizer(), false);
+  }
 }
 
 void HexagonPassConfig::addPreSched2() {
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index c2c6275..4b07ca7 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -334,21 +334,21 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
   // The only relocs left should be GP relative:
   default:
     if (MCID.mayStore() || MCID.mayLoad()) {
-      for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
-           ++ImpUses) {
-        if (*ImpUses == Hexagon::GP) {
-          switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
-          case HexagonII::MemAccessSize::ByteAccess:
-            return fixup_Hexagon_GPREL16_0;
-          case HexagonII::MemAccessSize::HalfWordAccess:
-            return fixup_Hexagon_GPREL16_1;
-          case HexagonII::MemAccessSize::WordAccess:
-            return fixup_Hexagon_GPREL16_2;
-          case HexagonII::MemAccessSize::DoubleWordAccess:
-            return fixup_Hexagon_GPREL16_3;
-          default:
-            llvm_unreachable("unhandled fixup");
-          }
+      for (const MCPhysReg *ImpUses = MCID.getImplicitUses();
+           ImpUses && *ImpUses; ++ImpUses) {
+        if (*ImpUses != Hexagon::GP)
+          continue;
+        switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+        case HexagonII::MemAccessSize::ByteAccess:
+          return fixup_Hexagon_GPREL16_0;
+        case HexagonII::MemAccessSize::HalfWordAccess:
+          return fixup_Hexagon_GPREL16_1;
+        case HexagonII::MemAccessSize::WordAccess:
+          return fixup_Hexagon_GPREL16_2;
+        case HexagonII::MemAccessSize::DoubleWordAccess:
+          return fixup_Hexagon_GPREL16_3;
+        default:
+          llvm_unreachable("unhandled fixup");
         }
       }
     } else
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 6ceb848..4e1cce3 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -95,14 +95,7 @@ unsigned HexagonResource::setWeight(unsigned s) {
   return (Weight);
 }
 
-HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL;
-
-bool HexagonCVIResource::SetUp = HexagonCVIResource::setup();
-
-bool HexagonCVIResource::setup() {
-  assert(!TUL);
-  TUL = new (TypeUnitsAndLanes);
-
+void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
   (*TUL)[HexagonII::TypeCVI_VA] =
       UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
   (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
@@ -123,13 +116,12 @@ bool HexagonCVIResource::setup() {
   (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
   (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
   (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
-
-  return true;
 }
 
-HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s,
+HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
+                                       MCInstrInfo const &MCII, unsigned s,
                                        MCInst const *id)
-    : HexagonResource(s) {
+    : HexagonResource(s), TUL(TUL) {
   unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
 
   if (TUL->count(T)) {
@@ -153,6 +145,7 @@ HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
                                  MCSubtargetInfo const &STI)
     : MCII(MCII), STI(STI) {
   reset();
+  HexagonCVIResource::SetupTUL(&TUL, STI.getCPU());
 }
 
 void HexagonShuffler::reset() {
@@ -163,7 +156,7 @@ void HexagonShuffler::reset() {
 
 void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
                              unsigned S, bool X) {
-  HexagonInstr PI(MCII, ID, Extender, S, X);
+  HexagonInstr PI(&TUL, MCII, ID, Extender, S, X);
 
   Packet.push_back(PI);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 174f10f..a093f85 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -20,6 +20,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -53,9 +54,11 @@ public:
 
 // HVX insn resources.
 class HexagonCVIResource : public HexagonResource {
+public:
   typedef std::pair<unsigned, unsigned> UnitsAndLanes;
   typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes;
 
+private:
   // Available HVX slots.
   enum {
     CVI_NONE = 0,
@@ -65,9 +68,7 @@ class HexagonCVIResource : public HexagonResource {
     CVI_MPY1 = 1 << 3
   };
 
-  static bool SetUp;
-  static bool setup();
-  static TypeUnitsAndLanes *TUL;
+  TypeUnitsAndLanes *TUL;
 
   // Count of adjacent slots that the insn requires to be executed.
   unsigned Lanes;
@@ -81,7 +82,9 @@ class HexagonCVIResource : public HexagonResource {
   void setStore(bool f = true) { Store = f; };
 
 public:
-  HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id);
+  HexagonCVIResource(TypeUnitsAndLanes *TUL, MCInstrInfo const &MCII,
+                     unsigned s, MCInst const *id);
+  static void SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU);
 
   bool isValid() const { return (Valid); };
   unsigned getLanes() const { return (Lanes); };
@@ -100,10 +103,11 @@ class HexagonInstr {
   bool SoloException;
 
 public:
-  HexagonInstr(MCInstrInfo const &MCII, MCInst const *id,
+  HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T,
+               MCInstrInfo const &MCII, MCInst const *id,
                MCInst const *Extender, unsigned s, bool x = false)
-      : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id),
-        SoloException(x){};
+      : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id),
+        SoloException(x) {};
 
   MCInst const *getDesc() const { return (ID); };
 
@@ -136,6 +140,8 @@ class HexagonShuffler {
   // Shuffling error code.
   unsigned Error;
 
+  HexagonCVIResource::TypeUnitsAndLanes TUL;
+
 protected:
   int64_t BundleFlags;
   MCInstrInfo const &MCII;
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
new file mode 100644
index 0000000..c547c71
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -0,0 +1,180 @@
+//===--- RDFCopy.cpp ------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simplistic RDF-based copy propagation.
+
+#include "RDFCopy.h"
+#include "RDFGraph.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+#include <atomic>
+
+#ifndef NDEBUG
+static cl::opt<unsigned> CpLimit("rdf-cp-limit", cl::init(0), cl::Hidden);
+static unsigned CpCount = 0;
+#endif
+
+using namespace llvm;
+using namespace rdf;
+
+void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, MachineInstr *MI) {
+  assert(MI->getOpcode() == TargetOpcode::COPY);
+  const MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1);
+  RegisterRef DstR = { Op0.getReg(), Op0.getSubReg() };
+  RegisterRef SrcR = { Op1.getReg(), Op1.getSubReg() };
+  auto FS = DefM.find(SrcR);
+  if (FS == DefM.end() || FS->second.empty())
+    return;
+  Copies.push_back(SA.Id);
+  RDefMap[SrcR][SA.Id] = FS->second.top()->Id;
+  // Insert DstR into the map.
+  RDefMap[DstR];
+}
+
+
+void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
+  RegisterSet RRs;
+  for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+    RRs.insert(RA.Addr->getRegRef());
+  bool Common = false;
+  for (auto &R : RDefMap) {
+    if (!RRs.count(R.first))
+      continue;
+    Common = true;
+    break;
+  }
+  if (!Common)
+    return;
+
+  for (auto &R : RDefMap) {
+    if (!RRs.count(R.first))
+      continue;
+    auto F = DefM.find(R.first);
+    if (F == DefM.end() || F->second.empty())
+      continue;
+    R.second[IA.Id] = F->second.top()->Id;
+  }
+}
+
+
+bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
+  bool Changed = false;
+  auto BA = DFG.getFunc().Addr->findBlock(B, DFG);
+  DFG.markBlock(BA.Id, DefM);
+
+  for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+    if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
+      NodeAddr<StmtNode*> SA = IA;
+      MachineInstr *MI = SA.Addr->getCode();
+      if (MI->isCopy())
+        recordCopy(SA, MI);
+    }
+
+    updateMap(IA);
+    DFG.pushDefs(IA, DefM);
+  }
+
+  MachineDomTreeNode *N = MDT.getNode(B);
+  for (auto I : *N)
+    Changed |= scanBlock(I->getBlock());
+
+  DFG.releaseBlock(BA.Id, DefM);
+  return Changed;
+}
+
+
+bool CopyPropagation::run() {
+  scanBlock(&DFG.getMF().front());
+
+  if (trace()) {
+    dbgs() << "Copies:\n";
+    for (auto I : Copies)
+      dbgs() << *DFG.addr<StmtNode*>(I).Addr->getCode();
+    dbgs() << "\nRDef map:\n";
+    for (auto R : RDefMap) {
+      dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {";
+      for (auto &M : R.second)
+        dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':'
+               << Print<NodeId>(M.second, DFG);
+      dbgs() << " }\n";
+    }
+  }
+
+  bool Changed = false;
+  NodeSet Deleted;
+#ifndef NDEBUG
+  bool HasLimit = CpLimit.getNumOccurrences() > 0;
+#endif
+
+  for (auto I : Copies) {
+#ifndef NDEBUG
+    if (HasLimit && CpCount >= CpLimit)
+      break;
+#endif
+    if (Deleted.count(I))
+      continue;
+    auto SA = DFG.addr<InstrNode*>(I);
+    NodeList Ds = SA.Addr->members_if(DFG.IsDef, DFG);
+    if (Ds.size() != 1)
+      continue;
+    NodeAddr<DefNode*> DA = Ds[0];
+    RegisterRef DR0 = DA.Addr->getRegRef();
+    NodeList Us = SA.Addr->members_if(DFG.IsUse, DFG);
+    if (Us.size() != 1)
+      continue;
+    NodeAddr<UseNode*> UA0 = Us[0];
+    RegisterRef UR0 = UA0.Addr->getRegRef();
+    NodeId RD0 = UA0.Addr->getReachingDef();
+
+    for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) {
+      auto UA = DFG.addr<UseNode*>(N);
+      NextN = UA.Addr->getSibling();
+      uint16_t F = UA.Addr->getFlags();
+      if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed))
+        continue;
+      if (UA.Addr->getRegRef() != DR0)
+        continue;
+      NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG);
+      assert(DFG.IsCode<NodeAttrs::Stmt>(IA));
+      MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+      if (RDefMap[UR0][IA.Id] != RD0)
+        continue;
+      MachineOperand &Op = UA.Addr->getOp();
+      if (Op.isTied())
+        continue;
+      if (trace()) {
+        dbgs() << "can replace " << Print<RegisterRef>(DR0, DFG)
+               << " with " << Print<RegisterRef>(UR0, DFG) << " in "
+               << *NodeAddr<StmtNode*>(IA).Addr->getCode();
+      }
+
+      Op.setReg(UR0.Reg);
+      Op.setSubReg(UR0.Sub);
+      Changed = true;
+#ifndef NDEBUG
+      if (HasLimit && CpCount >= CpLimit)
+        break;
+      CpCount++;
+#endif
+
+      if (MI->isCopy()) {
+        MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1);
+        if (Op0.getReg() == Op1.getReg() && Op0.getSubReg() == Op1.getSubReg())
+          MI->eraseFromParent();
+        Deleted.insert(IA.Id);
+      }
+    }
+  }
+
+  return Changed;
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
new file mode 100644
index 0000000..02531b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
@@ -0,0 +1,48 @@
+//===--- RDFCopy.h --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef RDF_COPY_H
+#define RDF_COPY_H
+
+#include "RDFGraph.h"
+#include <map>
+#include <vector>
+
+namespace llvm {
+  class MachineBasicBlock;
+  class MachineDominatorTree;
+  class MachineInstr;
+}
+
+namespace rdf {
+  struct CopyPropagation {
+    CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
+        Trace(false) {}
+
+    bool run();
+    void trace(bool On) { Trace = On; }
+    bool trace() const { return Trace; }
+
+  private:
+    const MachineDominatorTree &MDT;
+    DataFlowGraph &DFG;
+    DataFlowGraph::DefStackMap DefM;
+    bool Trace;
+
+    // map: register -> (map: stmt -> reaching def)
+    std::map<RegisterRef,std::map<NodeId,NodeId>> RDefMap;
+    std::vector<NodeId> Copies;
+
+    void recordCopy(NodeAddr<StmtNode*> SA, MachineInstr *MI);
+    void updateMap(NodeAddr<InstrNode*> IA);
+    bool scanBlock(MachineBasicBlock *B);
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
new file mode 100644
index 0000000..9566857
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -0,0 +1,204 @@
+//===--- RDFDeadCode.cpp --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// RDF-based generic dead code elimination.
+
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "RDFDeadCode.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+// Check if the given instruction has observable side-effects, i.e. if
+// it should be considered "live". It is safe for this function to be
+// overly conservative (i.e. return "true" for all instructions), but it
+// is not safe to return "false" for an instruction that should not be
+// considered removable.
+bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const {
+  if (MI->mayStore() || MI->isBranch() || MI->isCall() || MI->isReturn())
+    return true;
+  if (MI->hasOrderedMemoryRef() || MI->hasUnmodeledSideEffects())
+    return true;
+  if (MI->isPHI())
+    return false;
+  for (auto &Op : MI->operands())
+    if (Op.isReg() && MRI.isReserved(Op.getReg()))
+      return true;
+  return false;
+}
+
+void DeadCodeElimination::scanInstr(NodeAddr<InstrNode*> IA,
+      SetVector<NodeId> &WorkQ) {
+  if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
+    return;
+  if (!isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
+    return;
+  for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) {
+    if (!LiveNodes.count(RA.Id))
+      WorkQ.insert(RA.Id);
+  }
+}
+
+void DeadCodeElimination::processDef(NodeAddr<DefNode*> DA,
+      SetVector<NodeId> &WorkQ) {
+  NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
+  for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+    if (!LiveNodes.count(UA.Id))
+      WorkQ.insert(UA.Id);
+  }
+  for (NodeAddr<DefNode*> TA : DFG.getRelatedRefs(IA, DA))
+    LiveNodes.insert(TA.Id);
+}
+
+void DeadCodeElimination::processUse(NodeAddr<UseNode*> UA,
+      SetVector<NodeId> &WorkQ) {
+  for (NodeAddr<DefNode*> DA : LV.getAllReachingDefs(UA)) {
+    if (!LiveNodes.count(DA.Id))
+      WorkQ.insert(DA.Id);
+  }
+}
+
+// Traverse the DFG and collect the set dead RefNodes and the set of
+// dead instructions. Return "true" if any of these sets is non-empty,
+// "false" otherwise.
+bool DeadCodeElimination::collect() {
+  // This function works by first finding all live nodes. The dead nodes
+  // are then the complement of the set of live nodes.
+  //
+  // Assume that all nodes are dead. Identify instructions which must be
+  // considered live, i.e. instructions with observable side-effects, such
+  // as calls and stores. All arguments of such instructions are considered
+  // live. For each live def, all operands used in the corresponding
+  // instruction are considered live. For each live use, all its reaching
+  // defs are considered live.
+  LiveNodes.clear();
+  SetVector<NodeId> WorkQ;
+  for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG))
+    for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG))
+      scanInstr(IA, WorkQ);
+
+  while (!WorkQ.empty()) {
+    NodeId N = *WorkQ.begin();
+    WorkQ.remove(N);
+    LiveNodes.insert(N);
+    auto RA = DFG.addr<RefNode*>(N);
+    if (DFG.IsDef(RA))
+      processDef(RA, WorkQ);
+    else
+      processUse(RA, WorkQ);
+  }
+
+  if (trace()) {
+    dbgs() << "Live nodes:\n";
+    for (NodeId N : LiveNodes) {
+      auto RA = DFG.addr<RefNode*>(N);
+      dbgs() << PrintNode<RefNode*>(RA, DFG) << "\n";
+    }
+  }
+
+  auto IsDead = [this] (NodeAddr<InstrNode*> IA) -> bool {
+    for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG))
+      if (LiveNodes.count(DA.Id))
+        return false;
+    return true;
+  };
+
+  for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+    for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+      for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+        if (!LiveNodes.count(RA.Id))
+          DeadNodes.insert(RA.Id);
+      if (DFG.IsCode<NodeAttrs::Stmt>(IA))
+        if (isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
+          continue;
+      if (IsDead(IA)) {
+        DeadInstrs.insert(IA.Id);
+        if (trace())
+          dbgs() << "Dead instr: " << PrintNode<InstrNode*>(IA, DFG) << "\n";
+      }
+    }
+  }
+
+  return !DeadNodes.empty();
+}
+
+// Erase the nodes given in the Nodes set from DFG. In addition to removing
+// them from the DFG, if a node corresponds to a statement, the corresponding
+// machine instruction is erased from the function.
+bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
+  if (Nodes.empty())
+    return false;
+
+  // Prepare the actual set of ref nodes to remove: ref nodes from Nodes
+  // are included directly, for each InstrNode in Nodes, include the set
+  // of all RefNodes from it.
+  NodeList DRNs, DINs;
+  for (auto I : Nodes) {
+    auto BA = DFG.addr<NodeBase*>(I);
+    uint16_t Type = BA.Addr->getType();
+    if (Type == NodeAttrs::Ref) {
+      DRNs.push_back(DFG.addr<RefNode*>(I));
+      continue;
+    }
+
+    // If it's a code node, add all ref nodes from it.
+    uint16_t Kind = BA.Addr->getKind();
+    if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) {
+      for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG))
+        DRNs.push_back(N);
+      DINs.push_back(DFG.addr<InstrNode*>(I));
+    } else {
+      llvm_unreachable("Unexpected code node");
+      return false;
+    }
+  }
+
+  // Sort the list so that use nodes are removed first. This makes the
+  // "unlink" functions a bit faster.
+  auto UsesFirst = [] (NodeAddr<RefNode*> A, NodeAddr<RefNode*> B) -> bool {
+    uint16_t KindA = A.Addr->getKind(), KindB = B.Addr->getKind();
+    if (KindA == NodeAttrs::Use && KindB == NodeAttrs::Def)
+      return true;
+    if (KindA == NodeAttrs::Def && KindB == NodeAttrs::Use)
+      return false;
+    return A.Id < B.Id;
+  };
+  std::sort(DRNs.begin(), DRNs.end(), UsesFirst);
+
+  if (trace())
+    dbgs() << "Removing dead ref nodes:\n";
+  for (NodeAddr<RefNode*> RA : DRNs) {
+    if (trace())
+      dbgs() << "  " << PrintNode<RefNode*>(RA, DFG) << '\n';
+    if (DFG.IsUse(RA))
+      DFG.unlinkUse(RA);
+    else if (DFG.IsDef(RA))
+      DFG.unlinkDef(RA);
+  }
+
+  // Now, remove all dead instruction nodes.
+  for (NodeAddr<InstrNode*> IA : DINs) {
+    NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+    BA.Addr->removeMember(IA, DFG);
+    if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
+      continue;
+
+    MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+    if (trace())
+      dbgs() << "erasing: " << *MI;
+    MI->eraseFromParent();
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h
new file mode 100644
index 0000000..f4373fb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h
@@ -0,0 +1,65 @@
+//===--- RDFDeadCode.h ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// RDF-based generic dead code elimination.
+//
+// The main interface of this class are functions "collect" and "erase".
+// This allows custom processing of the function being optimized by a
+// particular consumer. The simplest way to use this class would be to
+// instantiate an object, and then simply call "collect" and "erase",
+// passing the result of "getDeadInstrs()" to it.
+// A more complex scenario would be to call "collect" first, then visit
+// all post-increment instructions to see if the address update is dead
+// or not, and if it is, convert the instruction to a non-updating form.
+// After that "erase" can be called with the set of nodes including both,
+// dead defs from the updating instructions and the nodes corresponding
+// to the dead instructions.
+
+#ifndef RDF_DEADCODE_H
+#define RDF_DEADCODE_H
+
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace llvm {
+  class MachineRegisterInfo;
+}
+
+namespace rdf {
+  struct DeadCodeElimination {
+    DeadCodeElimination(DataFlowGraph &dfg, MachineRegisterInfo &mri)
+      : Trace(false), DFG(dfg), MRI(mri), LV(mri, dfg) {}
+
+    bool collect();
+    bool erase(const SetVector<NodeId> &Nodes);
+    void trace(bool On) { Trace = On; }
+    bool trace() const { return Trace; }
+
+    SetVector<NodeId> getDeadNodes() { return DeadNodes; }
+    SetVector<NodeId> getDeadInstrs() { return DeadInstrs; }
+    DataFlowGraph &getDFG() { return DFG; }
+
+  private:
+    bool Trace;
+    SetVector<NodeId> LiveNodes;
+    SetVector<NodeId> DeadNodes;
+    SetVector<NodeId> DeadInstrs;
+    DataFlowGraph &DFG;
+    MachineRegisterInfo &MRI;
+    Liveness LV;
+
+    bool isLiveInstr(const MachineInstr *MI) const;
+    void scanInstr(NodeAddr<InstrNode*> IA, SetVector<NodeId> &WorkQ);
+    void processDef(NodeAddr<DefNode*> DA, SetVector<NodeId> &WorkQ);
+    void processUse(NodeAddr<UseNode*> UA, SetVector<NodeId> &WorkQ);
+  };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
new file mode 100644
index 0000000..9b47422
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
@@ -0,0 +1,1716 @@
+//===--- RDFGraph.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Target-independent, SSA-based data flow graph for register data flow (RDF).
+//
+#include "RDFGraph.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+// Printing functions. Have them here first, so that the rest of the code
+// can use them.
+namespace rdf {
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
+  auto &TRI = P.G.getTRI();
+  if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs())
+    OS << TRI.getName(P.Obj.Reg);
+  else
+    OS << '#' << P.Obj.Reg;
+  if (P.Obj.Sub > 0) {
+    OS << ':';
+    if (P.Obj.Sub < TRI.getNumSubRegIndices())
+      OS << TRI.getSubRegIndexName(P.Obj.Sub);
+    else
+      OS << '#' << P.Obj.Sub;
+  }
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
+  auto NA = P.G.addr<NodeBase*>(P.Obj);
+  uint16_t Attrs = NA.Addr->getAttrs();
+  uint16_t Kind = NodeAttrs::kind(Attrs);
+  uint16_t Flags = NodeAttrs::flags(Attrs);
+  switch (NodeAttrs::type(Attrs)) {
+    case NodeAttrs::Code:
+      switch (Kind) {
+        case NodeAttrs::Func:   OS << 'f'; break;
+        case NodeAttrs::Block:  OS << 'b'; break;
+        case NodeAttrs::Stmt:   OS << 's'; break;
+        case NodeAttrs::Phi:    OS << 'p'; break;
+        default:                OS << "c?"; break;
+      }
+      break;
+    case NodeAttrs::Ref:
+      if (Flags & NodeAttrs::Preserving)
+        OS << '+';
+      if (Flags & NodeAttrs::Clobbering)
+        OS << '~';
+      switch (Kind) {
+        case NodeAttrs::Use:    OS << 'u'; break;
+        case NodeAttrs::Def:    OS << 'd'; break;
+        case NodeAttrs::Block:  OS << 'b'; break;
+        default:                OS << "r?"; break;
+      }
+      break;
+    default:
+      OS << '?';
+      break;
+  }
+  OS << P.Obj;
+  if (Flags & NodeAttrs::Shadow)
+    OS << '"';
+  return OS;
+}
+
+namespace {
+  void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
+        const DataFlowGraph &G) {
+    OS << Print<NodeId>(RA.Id, G) << '<'
+       << Print<RegisterRef>(RA.Addr->getRegRef(), G) << '>';
+    if (RA.Addr->getFlags() & NodeAttrs::Fixed)
+      OS << '!';
+  }
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
+  printRefHeader(OS, P.Obj, P.G);
+  OS << '(';
+  if (NodeId N = P.Obj.Addr->getReachingDef())
+    OS << Print<NodeId>(N, P.G);
+  OS << ',';
+  if (NodeId N = P.Obj.Addr->getReachedDef())
+    OS << Print<NodeId>(N, P.G);
+  OS << ',';
+  if (NodeId N = P.Obj.Addr->getReachedUse())
+    OS << Print<NodeId>(N, P.G);
+  OS << "):";
+  if (NodeId N = P.Obj.Addr->getSibling())
+    OS << Print<NodeId>(N, P.G);
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
+  printRefHeader(OS, P.Obj, P.G);
+  OS << '(';
+  if (NodeId N = P.Obj.Addr->getReachingDef())
+    OS << Print<NodeId>(N, P.G);
+  OS << "):";
+  if (NodeId N = P.Obj.Addr->getSibling())
+    OS << Print<NodeId>(N, P.G);
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+      const Print<NodeAddr<PhiUseNode*>> &P) {
+  printRefHeader(OS, P.Obj, P.G);
+  OS << '(';
+  if (NodeId N = P.Obj.Addr->getReachingDef())
+    OS << Print<NodeId>(N, P.G);
+  OS << ',';
+  if (NodeId N = P.Obj.Addr->getPredecessor())
+    OS << Print<NodeId>(N, P.G);
+  OS << "):";
+  if (NodeId N = P.Obj.Addr->getSibling())
+    OS << Print<NodeId>(N, P.G);
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
+  switch (P.Obj.Addr->getKind()) {
+    case NodeAttrs::Def:
+      OS << PrintNode<DefNode*>(P.Obj, P.G);
+      break;
+    case NodeAttrs::Use:
+      if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
+        OS << PrintNode<PhiUseNode*>(P.Obj, P.G);
+      else
+        OS << PrintNode<UseNode*>(P.Obj, P.G);
+      break;
+  }
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
+  unsigned N = P.Obj.size();
+  for (auto I : P.Obj) {
+    OS << Print<NodeId>(I.Id, P.G);
+    if (--N)
+      OS << ' ';
+  }
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
+  unsigned N = P.Obj.size();
+  for (auto I : P.Obj) {
+    OS << Print<NodeId>(I, P.G);
+    if (--N)
+      OS << ' ';
+  }
+  return OS;
+}
+
+namespace {
+  template <typename T>
+  struct PrintListV {
+    PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+    typedef T Type;
+    const NodeList &List;
+    const DataFlowGraph &G;
+  };
+
+  template <typename T>
+  raw_ostream &operator<< (raw_ostream &OS, const PrintListV<T> &P) {
+    unsigned N = P.List.size();
+    for (NodeAddr<T> A : P.List) {
+      OS << PrintNode<T>(A, P.G);
+      if (--N)
+        OS << ", ";
+    }
+    return OS;
+  }
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
+  OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi ["
+     << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+      const Print<NodeAddr<StmtNode*>> &P) {
+  unsigned Opc = P.Obj.Addr->getCode()->getOpcode();
+  OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc)
+     << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+      const Print<NodeAddr<InstrNode*>> &P) {
+  switch (P.Obj.Addr->getKind()) {
+    case NodeAttrs::Phi:
+      OS << PrintNode<PhiNode*>(P.Obj, P.G);
+      break;
+    case NodeAttrs::Stmt:
+      OS << PrintNode<StmtNode*>(P.Obj, P.G);
+      break;
+    default:
+      OS << "instr? " << Print<NodeId>(P.Obj.Id, P.G);
+      break;
+  }
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+      const Print<NodeAddr<BlockNode*>> &P) {
+  auto *BB = P.Obj.Addr->getCode();
+  unsigned NP = BB->pred_size();
+  std::vector<int> Ns;
+  auto PrintBBs = [&OS,&P] (std::vector<int> Ns) -> void {
+    unsigned N = Ns.size();
+    for (auto I : Ns) {
+      OS << "BB#" << I;
+      if (--N)
+        OS << ", ";
+    }
+  };
+
+  OS << Print<NodeId>(P.Obj.Id, P.G) << ": === BB#" << BB->getNumber()
+     << " === preds(" << NP << "): ";
+  for (auto I : BB->predecessors())
+    Ns.push_back(I->getNumber());
+  PrintBBs(Ns);
+
+  unsigned NS = BB->succ_size();
+  OS << "  succs(" << NS << "): ";
+  Ns.clear();
+  for (auto I : BB->successors())
+    Ns.push_back(I->getNumber());
+  PrintBBs(Ns);
+  OS << '\n';
+
+  for (auto I : P.Obj.Addr->members(P.G))
+    OS << PrintNode<InstrNode*>(I, P.G) << '\n';
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+      const Print<NodeAddr<FuncNode*>> &P) {
+  OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: "
+     << P.Obj.Addr->getCode()->getName() << '\n';
+  for (auto I : P.Obj.Addr->members(P.G))
+    OS << PrintNode<BlockNode*>(I, P.G) << '\n';
+  OS << "]\n";
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
+  OS << '{';
+  for (auto I : P.Obj)
+    OS << ' ' << Print<RegisterRef>(I, P.G);
+  OS << " }";
+  return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+      const Print<DataFlowGraph::DefStack> &P) {
+  for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
+    OS << Print<NodeId>(I->Id, P.G)
+       << '<' << Print<RegisterRef>(I->Addr->getRegRef(), P.G) << '>';
+    I.down();
+    if (I != E)
+      OS << ' ';
+  }
+  return OS;
+}
+
+} // namespace rdf
+
+// Node allocation functions.
+//
+// Node allocator is like a slab memory allocator: it allocates blocks of
+// memory in sizes that are multiples of the size of a node. Each block has
+// the same size. Nodes are allocated from the currently active block, and
+// when it becomes full, a new one is created.
+// There is a mapping scheme between node id and its location in a block,
+// and within that block is described in the header file.
+//
+void NodeAllocator::startNewBlock() {
+  void *T = MemPool.Allocate(NodesPerBlock*NodeMemSize, NodeMemSize);
+  char *P = static_cast<char*>(T);
+  Blocks.push_back(P);
+  // Check if the block index is still within the allowed range, i.e. less
+  // than 2^N, where N is the number of bits in NodeId for the block index.
+  // BitsPerIndex is the number of bits per node index.
+  assert((Blocks.size() < (1U << (8*sizeof(NodeId)-BitsPerIndex))) &&
+         "Out of bits for block index");
+  ActiveEnd = P;
+}
+
+bool NodeAllocator::needNewBlock() {
+  if (Blocks.empty())
+    return true;
+
+  char *ActiveBegin = Blocks.back();
+  uint32_t Index = (ActiveEnd-ActiveBegin)/NodeMemSize;
+  return Index >= NodesPerBlock;
+}
+
+NodeAddr<NodeBase*> NodeAllocator::New() {
+  if (needNewBlock())
+    startNewBlock();
+
+  uint32_t ActiveB = Blocks.size()-1;
+  uint32_t Index = (ActiveEnd - Blocks[ActiveB])/NodeMemSize;
+  NodeAddr<NodeBase*> NA = { reinterpret_cast<NodeBase*>(ActiveEnd),
+                             makeId(ActiveB, Index) };
+  ActiveEnd += NodeMemSize;
+  return NA;
+}
+
+NodeId NodeAllocator::id(const NodeBase *P) const {
+  uintptr_t A = reinterpret_cast<uintptr_t>(P);
+  for (unsigned i = 0, n = Blocks.size(); i != n; ++i) {
+    uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]);
+    if (A < B || A >= B + NodesPerBlock*NodeMemSize)
+      continue;
+    uint32_t Idx = (A-B)/NodeMemSize;
+    return makeId(i, Idx);
+  }
+  llvm_unreachable("Invalid node address");
+}
+
+void NodeAllocator::clear() {
+  MemPool.Reset();
+  Blocks.clear();
+  ActiveEnd = nullptr;
+}
+
+
+// Insert node NA after "this" in the circular chain.
+void NodeBase::append(NodeAddr<NodeBase*> NA) {
+  NodeId Nx = Next;
+  // If NA is already "next", do nothing.
+  if (Next != NA.Id) {
+    Next = NA.Id;
+    NA.Addr->Next = Nx;
+  }
+}
+
+
+// Fundamental node manipulator functions.
+
+// Obtain the register reference from a reference node.
+RegisterRef RefNode::getRegRef() const {
+  assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+  if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
+    return Ref.RR;
+  assert(Ref.Op != nullptr);
+  return { Ref.Op->getReg(), Ref.Op->getSubReg() };
+}
+
+// Set the register reference in the reference node directly (for references
+// in phi nodes).
+void RefNode::setRegRef(RegisterRef RR) {
+  assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+  assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef);
+  Ref.RR = RR;
+}
+
+// Set the register reference in the reference node based on a machine
+// operand (for references in statement nodes).
+void RefNode::setRegRef(MachineOperand *Op) {
+  assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+  assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef));
+  Ref.Op = Op;
+}
+
+// Get the owner of a given reference node.
+NodeAddr<NodeBase*> RefNode::getOwner(const DataFlowGraph &G) {
+  NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+
+  while (NA.Addr != this) {
+    if (NA.Addr->getType() == NodeAttrs::Code)
+      return NA;
+    NA = G.addr<NodeBase*>(NA.Addr->getNext());
+  }
+  llvm_unreachable("No owner in circular list");
+}
+
+// Connect the def node to the reaching def node.
+void DefNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
+  Ref.RD = DA.Id;
+  Ref.Sib = DA.Addr->getReachedDef();
+  DA.Addr->setReachedDef(Self);
+}
+
+// Connect the use node to the reaching def node.
+void UseNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
+  Ref.RD = DA.Id;
+  Ref.Sib = DA.Addr->getReachedUse();
+  DA.Addr->setReachedUse(Self);
+}
+
+// Get the first member of the code node.
+NodeAddr<NodeBase*> CodeNode::getFirstMember(const DataFlowGraph &G) const {
+  if (Code.FirstM == 0)
+    return NodeAddr<NodeBase*>();
+  return G.addr<NodeBase*>(Code.FirstM);
+}
+
+// Get the last member of the code node.
+NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const {
+  if (Code.LastM == 0)
+    return NodeAddr<NodeBase*>();
+  return G.addr<NodeBase*>(Code.LastM);
+}
+
+// Add node NA at the end of the member list of the given code node.
+void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
+  auto ML = getLastMember(G);
+  if (ML.Id != 0) {
+    ML.Addr->append(NA);
+  } else {
+    Code.FirstM = NA.Id;
+    NodeId Self = G.id(this);
+    NA.Addr->setNext(Self);
+  }
+  Code.LastM = NA.Id;
+}
+
+// Add node NA after member node MA in the given code node.
+void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
+      const DataFlowGraph &G) {
+  MA.Addr->append(NA);
+  if (Code.LastM == MA.Id)
+    Code.LastM = NA.Id;
+}
+
+// Remove member node NA from the given code node.
+void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
+  auto MA = getFirstMember(G);
+  assert(MA.Id != 0);
+
+  // Special handling if the member to remove is the first member.
+  if (MA.Id == NA.Id) {
+    if (Code.LastM == MA.Id) {
+      // If it is the only member, set both first and last to 0.
+      Code.FirstM = Code.LastM = 0;
+    } else {
+      // Otherwise, advance the first member.
+      Code.FirstM = MA.Addr->getNext();
+    }
+    return;
+  }
+
+  while (MA.Addr != this) {
+    NodeId MX = MA.Addr->getNext();
+    if (MX == NA.Id) {
+      MA.Addr->setNext(NA.Addr->getNext());
+      // If the member to remove happens to be the last one, update the
+      // LastM indicator.
+      if (Code.LastM == NA.Id)
+        Code.LastM = MA.Id;
+      return;
+    }
+    MA = G.addr<NodeBase*>(MX);
+  }
+  llvm_unreachable("No such member");
+}
+
+// Return the list of all members of the code node.
+NodeList CodeNode::members(const DataFlowGraph &G) const {
+  static auto True = [] (NodeAddr<NodeBase*>) -> bool { return true; };
+  return members_if(True, G);
+}
+
+// Return the owner of the given instr node.
+NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) {
+  NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+
+  while (NA.Addr != this) {
+    assert(NA.Addr->getType() == NodeAttrs::Code);
+    if (NA.Addr->getKind() == NodeAttrs::Block)
+      return NA;
+    NA = G.addr<NodeBase*>(NA.Addr->getNext());
+  }
+  llvm_unreachable("No owner in circular list");
+}
+
+// Add the phi node PA to the given block node.
+void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
+  auto M = getFirstMember(G);
+  if (M.Id == 0) {
+    addMember(PA, G);
+    return;
+  }
+
+  assert(M.Addr->getType() == NodeAttrs::Code);
+  if (M.Addr->getKind() == NodeAttrs::Stmt) {
+    // If the first member of the block is a statement, insert the phi as
+    // the first member.
+    Code.FirstM = PA.Id;
+    PA.Addr->setNext(M.Id);
+  } else {
+    // If the first member is a phi, find the last phi, and append PA to it.
+    assert(M.Addr->getKind() == NodeAttrs::Phi);
+    NodeAddr<NodeBase*> MN = M;
+    do {
+      M = MN;
+      MN = G.addr<NodeBase*>(M.Addr->getNext());
+      assert(MN.Addr->getType() == NodeAttrs::Code);
+    } while (MN.Addr->getKind() == NodeAttrs::Phi);
+
+    // M is the last phi.
+    addMemberAfter(M, PA, G);
+  }
+}
+
+// Find the block node corresponding to the machine basic block BB in the
+// given func node.
+NodeAddr<BlockNode*> FuncNode::findBlock(const MachineBasicBlock *BB,
+      const DataFlowGraph &G) const {
+  auto EqBB = [BB] (NodeAddr<NodeBase*> NA) -> bool {
+    return NodeAddr<BlockNode*>(NA).Addr->getCode() == BB;
+  };
+  NodeList Ms = members_if(EqBB, G);
+  if (!Ms.empty())
+    return Ms[0];
+  return NodeAddr<BlockNode*>();
+}
+
+// Get the block node for the entry block in the given function.
+NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
+  MachineBasicBlock *EntryB = &getCode()->front();
+  return findBlock(EntryB, G);
+}
+
+
+// Register aliasing information.
+//
+// In theory, the lane information could be used to determine register
+// covering (and aliasing), but depending on the sub-register structure,
+// the lane mask information may be missing. The covering information
+// must be available for this framework to work, so relying solely on
+// the lane data is not sufficient.
+
+// Determine whether RA covers RB.
+bool RegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const {
+  if (RA == RB)
+    return true;
+  if (TargetRegisterInfo::isVirtualRegister(RA.Reg)) {
+    assert(TargetRegisterInfo::isVirtualRegister(RB.Reg));
+    if (RA.Reg != RB.Reg)
+      return false;
+    if (RA.Sub == 0)
+      return true;
+    return TRI.composeSubRegIndices(RA.Sub, RB.Sub) == RA.Sub;
+  }
+
+  assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg) &&
+         TargetRegisterInfo::isPhysicalRegister(RB.Reg));
+  unsigned A = RA.Sub != 0 ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg;
+  unsigned B = RB.Sub != 0 ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg;
+  return TRI.isSubRegister(A, B);
+}
+
+// Determine whether RR is covered by the set of references RRs.
+bool RegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR) const {
+  if (RRs.count(RR))
+    return true;
+
+  // For virtual registers, we cannot accurately determine covering based
+  // on subregisters. If RR itself is not present in RRs, but it has a sub-
+  // register reference, check for the super-register alone. Otherwise,
+  // assume non-covering.
+  if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
+    if (RR.Sub != 0)
+      return RRs.count({RR.Reg, 0});
+    return false;
+  }
+
+  // If any super-register of RR is present, then RR is covered.
+  unsigned Reg = RR.Sub == 0 ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub);
+  for (MCSuperRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
+    if (RRs.count({*SR, 0}))
+      return true;
+
+  return false;
+}
+
+// Get the list of references aliased to RR.
+std::vector<RegisterRef> RegisterAliasInfo::getAliasSet(RegisterRef RR) const {
+  // Do not include RR in the alias set. For virtual registers return an
+  // empty set.
+  std::vector<RegisterRef> AS;
+  if (TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return AS;
+  assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
+  unsigned R = RR.Reg;
+  if (RR.Sub)
+    R = TRI.getSubReg(RR.Reg, RR.Sub);
+
+  for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
+    AS.push_back(RegisterRef({*AI, 0}));
+  return AS;
+}
+
+// Check whether RA and RB are aliased.
+bool RegisterAliasInfo::alias(RegisterRef RA, RegisterRef RB) const {
+  bool VirtA = TargetRegisterInfo::isVirtualRegister(RA.Reg);
+  bool VirtB = TargetRegisterInfo::isVirtualRegister(RB.Reg);
+  bool PhysA = TargetRegisterInfo::isPhysicalRegister(RA.Reg);
+  bool PhysB = TargetRegisterInfo::isPhysicalRegister(RB.Reg);
+
+  if (VirtA != VirtB)
+    return false;
+
+  if (VirtA) {
+    if (RA.Reg != RB.Reg)
+      return false;
+    // RA and RB refer to the same register. If any of them refer to the
+    // whole register, they must be aliased.
+    if (RA.Sub == 0 || RB.Sub == 0)
+      return true;
+    unsigned SA = TRI.getSubRegIdxSize(RA.Sub);
+    unsigned OA = TRI.getSubRegIdxOffset(RA.Sub);
+    unsigned SB = TRI.getSubRegIdxSize(RB.Sub);
+    unsigned OB = TRI.getSubRegIdxOffset(RB.Sub);
+    if (OA <= OB && OA+SA > OB)
+      return true;
+    if (OB <= OA && OB+SB > OA)
+      return true;
+    return false;
+  }
+
+  assert(PhysA && PhysB);
+  (void)PhysA, (void)PhysB;
+  unsigned A = RA.Sub ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg;
+  unsigned B = RB.Sub ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg;
+  for (MCRegAliasIterator I(A, &TRI, true); I.isValid(); ++I)
+    if (B == *I)
+      return true;
+  return false;
+}
+
+
+// Target operand information.
+//
+
+// For a given instruction, check if there are any bits of RR that can remain
+// unchanged across this def.
+bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum)
+      const {
+  return TII.isPredicated(&In);
+}
+
+// Check if the definition of RR produces an unspecified value.
+bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
+      const {
+  if (In.isCall())
+    if (In.getOperand(OpNum).isImplicit())
+      return true;
+  return false;
+}
+
+// Check if the given instruction specifically requires 
+bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
+      const {
+  if (In.isCall() || In.isReturn())
+    return true;
+  const MCInstrDesc &D = In.getDesc();
+  if (!D.getImplicitDefs() && !D.getImplicitUses())
+    return false;
+  const MachineOperand &Op = In.getOperand(OpNum);
+  // If there is a sub-register, treat the operand as non-fixed. Currently,
+  // fixed registers are those that are listed in the descriptor as implicit
+  // uses or defs, and those lists do not allow sub-registers.
+  if (Op.getSubReg() != 0)
+    return false;
+  unsigned Reg = Op.getReg();
+  const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs()
+                                     : D.getImplicitUses();
+  if (!ImpR)
+    return false;
+  while (*ImpR)
+    if (*ImpR++ == Reg)
+      return true;
+  return false;
+}
+
+
+//
+// The data flow graph construction.
+//
+
+DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+      const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+      const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai,
+      const TargetOperandInfo &toi)
+    : TimeG("rdf"), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), RAI(rai),
+      TOI(toi) {
+}
+
+
+// The implementation of the definition stack.
+// Each register reference has its own definition stack. In particular,
+// for a register references "Reg" and "Reg:subreg" will each have their
+// own definition stacks.
+
+// Construct a stack iterator.
+DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
+      bool Top) : DS(S) {
+  if (!Top) {
+    // Initialize to bottom.
+    Pos = 0;
+    return;
+  }
+  // Initialize to the top, i.e. top-most non-delimiter (or 0, if empty).
+  Pos = DS.Stack.size();
+  while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos-1]))
+    Pos--;
+}
+
+// Return the size of the stack, including block delimiters.
+unsigned DataFlowGraph::DefStack::size() const {
+  unsigned S = 0;
+  for (auto I = top(), E = bottom(); I != E; I.down())
+    S++;
+  return S;
+}
+
+// Remove the top entry from the stack. Remove all intervening delimiters
+// so that after this, the stack is either empty, or the top of the stack
+// is a non-delimiter.
+void DataFlowGraph::DefStack::pop() {
+  assert(!empty());
+  unsigned P = nextDown(Stack.size());
+  Stack.resize(P);
+}
+
+// Push a delimiter for block node N on the stack.
+void DataFlowGraph::DefStack::start_block(NodeId N) {
+  assert(N != 0);
+  Stack.push_back(NodeAddr<DefNode*>(nullptr, N));
+}
+
+// Remove all nodes from the top of the stack, until the delimited for
+// block node N is encountered. Remove the delimiter as well. In effect,
+// this will remove from the stack all definitions from block N.
+void DataFlowGraph::DefStack::clear_block(NodeId N) {
+  assert(N != 0);
+  unsigned P = Stack.size();
+  while (P > 0) {
+    bool Found = isDelimiter(Stack[P-1], N);
+    P--;
+    if (Found)
+      break;
+  }
+  // This will also remove the delimiter, if found.
+  Stack.resize(P);
+}
+
+// Move the stack iterator up by one.
+unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const {
+  // Get the next valid position after P (skipping all delimiters).
+  // The input position P does not have to point to a non-delimiter.
+  unsigned SS = Stack.size();
+  bool IsDelim;
+  assert(P < SS);
+  do {
+    P++;
+    IsDelim = isDelimiter(Stack[P-1]);
+  } while (P < SS && IsDelim);
+  assert(!IsDelim);
+  return P;
+}
+
+// Move the stack iterator down by one.
+unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
+  // Get the preceding valid position before P (skipping all delimiters).
+  // The input position P does not have to point to a non-delimiter.
+  assert(P > 0 && P <= Stack.size());
+  bool IsDelim = isDelimiter(Stack[P-1]);
+  do {
+    if (--P == 0)
+      break;
+    IsDelim = isDelimiter(Stack[P-1]);
+  } while (P > 0 && IsDelim);
+  assert(!IsDelim);
+  return P;
+}
+
+// Node management functions.
+
+// Get the pointer to the node with the id N.
+NodeBase *DataFlowGraph::ptr(NodeId N) const {
+  if (N == 0)
+    return nullptr;
+  return Memory.ptr(N);
+}
+
+// Get the id of the node at the address P.
+NodeId DataFlowGraph::id(const NodeBase *P) const {
+  if (P == nullptr)
+    return 0;
+  return Memory.id(P);
+}
+
+// Allocate a new node and set the attributes to Attrs.
+NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) {
+  NodeAddr<NodeBase*> P = Memory.New();
+  P.Addr->init();
+  P.Addr->setAttrs(Attrs);
+  return P;
+}
+
+// Make a copy of the given node B, except for the data-flow links, which
+// are set to 0.
+NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
+  NodeAddr<NodeBase*> NA = newNode(0);
+  memcpy(NA.Addr, B.Addr, sizeof(NodeBase));
+  // Ref nodes need to have the data-flow links reset.
+  if (NA.Addr->getType() == NodeAttrs::Ref) {
+    NodeAddr<RefNode*> RA = NA;
+    RA.Addr->setReachingDef(0);
+    RA.Addr->setSibling(0);
+    if (NA.Addr->getKind() == NodeAttrs::Def) {
+      NodeAddr<DefNode*> DA = NA;
+      DA.Addr->setReachedDef(0);
+      DA.Addr->setReachedUse(0);
+    }
+  }
+  return NA;
+}
+
+
+// Allocation routines for specific node types/kinds.
+
+NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
+      MachineOperand &Op, uint16_t Flags) {
+  NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+  UA.Addr->setRegRef(&Op);
+  return UA;
+}
+
+NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner,
+      RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) {
+  NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+  assert(Flags & NodeAttrs::PhiRef);
+  PUA.Addr->setRegRef(RR);
+  PUA.Addr->setPredecessor(PredB.Id);
+  return PUA;
+}
+
+NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
+      MachineOperand &Op, uint16_t Flags) {
+  NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+  DA.Addr->setRegRef(&Op);
+  return DA;
+}
+
+NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
+      RegisterRef RR, uint16_t Flags) {
+  NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+  assert(Flags & NodeAttrs::PhiRef);
+  DA.Addr->setRegRef(RR);
+  return DA;
+}
+
+NodeAddr<PhiNode*> DataFlowGraph::newPhi(NodeAddr<BlockNode*> Owner) {
+  NodeAddr<PhiNode*> PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
+  Owner.Addr->addPhi(PA, *this);
+  return PA;
+}
+
+NodeAddr<StmtNode*> DataFlowGraph::newStmt(NodeAddr<BlockNode*> Owner,
+      MachineInstr *MI) {
+  NodeAddr<StmtNode*> SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
+  SA.Addr->setCode(MI);
+  Owner.Addr->addMember(SA, *this);
+  return SA;
+}
+
+NodeAddr<BlockNode*> DataFlowGraph::newBlock(NodeAddr<FuncNode*> Owner,
+      MachineBasicBlock *BB) {
+  NodeAddr<BlockNode*> BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
+  BA.Addr->setCode(BB);
+  Owner.Addr->addMember(BA, *this);
+  return BA;
+}
+
+NodeAddr<FuncNode*> DataFlowGraph::newFunc(MachineFunction *MF) {
+  NodeAddr<FuncNode*> FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
+  FA.Addr->setCode(MF);
+  return FA;
+}
+
+// Build the data flow graph.
+void DataFlowGraph::build() {
+  reset();
+  Func = newFunc(&MF);
+
+  if (MF.empty())
+    return;
+
+  for (auto &B : MF) {
+    auto BA = newBlock(Func, &B);
+    for (auto &I : B) {
+      if (I.isDebugValue())
+        continue;
+      buildStmt(BA, I);
+    }
+  }
+
+  // Collect information about block references.
+  NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this);
+  BlockRefsMap RefM;
+  buildBlockRefs(EA, RefM);
+
+  // Add function-entry phi nodes.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+    NodeAddr<PhiNode*> PA = newPhi(EA);
+    RegisterRef RR = { I->first, 0 };
+    uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+    NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+    PA.Addr->addMember(DA, *this);
+  }
+
+  // Build a map "PhiM" which will contain, for each block, the set
+  // of references that will require phi definitions in that block.
+  BlockRefsMap PhiM;
+  auto Blocks = Func.Addr->members(*this);
+  for (NodeAddr<BlockNode*> BA : Blocks)
+    recordDefsForDF(PhiM, RefM, BA);
+  for (NodeAddr<BlockNode*> BA : Blocks)
+    buildPhis(PhiM, RefM, BA);
+
+  // Link all the refs. This will recursively traverse the dominator tree.
+  DefStackMap DM;
+  linkBlockRefs(DM, EA);
+
+  // Finally, remove all unused phi nodes.
+  removeUnusedPhis();
+}
+
+// For each stack in the map DefM, push the delimiter for block B on it.
+void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
+  // Push block delimiters.
+  for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
+    I->second.start_block(B);
+}
+
+// Remove all definitions coming from block B from each stack in DefM.
+void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
+  // Pop all defs from this block from the definition stack. Defs that were
+  // added to the map during the traversal of instructions will not have a
+  // delimiter, but for those, the whole stack will be emptied.
+  for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
+    I->second.clear_block(B);
+
+  // Finally, remove empty stacks from the map.
+  for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) {
+    NextI = std::next(I);
+    // This preserves the validity of iterators other than I.
+    if (I->second.empty())
+      DefM.erase(I);
+  }
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
+void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+  NodeList Defs = IA.Addr->members_if(IsDef, *this);
+  NodeSet Visited;
+#ifndef NDEBUG
+  RegisterSet Defined;
+#endif
+
+  // The important objectives of this function are:
+  // - to be able to handle instructions both while the graph is being
+  //   constructed, and after the graph has been constructed, and
+  // - maintain proper ordering of definitions on the stack for each
+  //   register reference:
+  //   - if there are two or more related defs in IA (i.e. coming from
+  //     the same machine operand), then only push one def on the stack,
+  //   - if there are multiple unrelated defs of non-overlapping
+  //     subregisters of S, then the stack for S will have both (in an
+  //     unspecified order), but the order does not matter from the data-
+  //     -flow perspective.
+
+  for (NodeAddr<DefNode*> DA : Defs) {
+    if (Visited.count(DA.Id))
+      continue;
+    NodeList Rel = getRelatedRefs(IA, DA);
+    NodeAddr<DefNode*> PDA = Rel.front();
+    // Push the definition on the stack for the register and all aliases.
+    RegisterRef RR = PDA.Addr->getRegRef();
+#ifndef NDEBUG
+    // Assert if the register is defined in two or more unrelated defs.
+    // This could happen if there are two or more def operands defining it.
+    if (!Defined.insert(RR).second) {
+      auto *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+      dbgs() << "Multiple definitions of register: "
+             << Print<RegisterRef>(RR, *this) << " in\n  " << *MI
+             << "in BB#" << MI->getParent()->getNumber() << '\n';
+      llvm_unreachable(nullptr);
+    }
+#endif
+    DefM[RR].push(DA);
+    for (auto A : RAI.getAliasSet(RR)) {
+      assert(A != RR);
+      DefM[A].push(DA);
+    }
+    // Mark all the related defs as visited.
+    for (auto T : Rel)
+      Visited.insert(T.Id);
+  }
+}
+
+// Return the list of all reference nodes related to RA, including RA itself.
+// See "getNextRelated" for the meaning of a "related reference".
+NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
+      NodeAddr<RefNode*> RA) const {
+  assert(IA.Id != 0 && RA.Id != 0);
+
+  NodeList Refs;
+  NodeId Start = RA.Id;
+  do {
+    Refs.push_back(RA);
+    RA = getNextRelated(IA, RA);
+  } while (RA.Id != 0 && RA.Id != Start);
+  return Refs;
+}
+
+
+// Clear all information in the graph.
+void DataFlowGraph::reset() {
+  Memory.clear();
+  Func = NodeAddr<FuncNode*>();
+}
+
+
+// Return the next reference node in the instruction node IA that is related
+// to RA. Conceptually, two reference nodes are related if they refer to the
+// same instance of a register access, but differ in flags or other minor
+// characteristics. Specific examples of related nodes are shadow reference
+// nodes.
+// Return the equivalent of nullptr if there are no more related references.
+NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
+      NodeAddr<RefNode*> RA) const {
+  assert(IA.Id != 0 && RA.Id != 0);
+
+  auto Related = [RA](NodeAddr<RefNode*> TA) -> bool {
+    if (TA.Addr->getKind() != RA.Addr->getKind())
+      return false;
+    if (TA.Addr->getRegRef() != RA.Addr->getRegRef())
+      return false;
+    return true;
+  };
+  auto RelatedStmt = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
+    return Related(TA) &&
+           &RA.Addr->getOp() == &TA.Addr->getOp();
+  };
+  auto RelatedPhi = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
+    if (!Related(TA))
+      return false;
+    if (TA.Addr->getKind() != NodeAttrs::Use)
+      return true;
+    // For phi uses, compare predecessor blocks.
+    const NodeAddr<const PhiUseNode*> TUA = TA;
+    const NodeAddr<const PhiUseNode*> RUA = RA;
+    return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor();
+  };
+
+  RegisterRef RR = RA.Addr->getRegRef();
+  if (IA.Addr->getKind() == NodeAttrs::Stmt)
+    return RA.Addr->getNextRef(RR, RelatedStmt, true, *this);
+  return RA.Addr->getNextRef(RR, RelatedPhi, true, *this);
+}
+
+// Find the next node related to RA in IA that satisfies condition P.
+// If such a node was found, return a pair where the second element is the
+// located node. If such a node does not exist, return a pair where the
+// first element is the element after which such a node should be inserted,
+// and the second element is a null-address.
+template <typename Predicate>
+std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
+DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+      Predicate P) const {
+  assert(IA.Id != 0 && RA.Id != 0);
+
+  NodeAddr<RefNode*> NA;
+  NodeId Start = RA.Id;
+  while (true) {
+    NA = getNextRelated(IA, RA);
+    if (NA.Id == 0 || NA.Id == Start)
+      break;
+    if (P(NA))
+      break;
+    RA = NA;
+  }
+
+  if (NA.Id != 0 && NA.Id != Start)
+    return std::make_pair(RA, NA);
+  return std::make_pair(RA, NodeAddr<RefNode*>());
+}
+
+// Get the next shadow node in IA corresponding to RA, and optionally create
+// such a node if it does not exist.
+NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
+      NodeAddr<RefNode*> RA, bool Create) {
+  assert(IA.Id != 0 && RA.Id != 0);
+
+  uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
+  auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
+    return TA.Addr->getFlags() == Flags;
+  };
+  auto Loc = locateNextRef(IA, RA, IsShadow);
+  if (Loc.second.Id != 0 || !Create)
+    return Loc.second;
+
+  // Create a copy of RA and mark is as shadow.
+  NodeAddr<RefNode*> NA = cloneNode(RA);
+  NA.Addr->setFlags(Flags | NodeAttrs::Shadow);
+  IA.Addr->addMemberAfter(Loc.first, NA, *this);
+  return NA;
+}
+
+// Get the next shadow node in IA corresponding to RA. Return null-address
+// if such a node does not exist.
+NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
+      NodeAddr<RefNode*> RA) const {
+  assert(IA.Id != 0 && RA.Id != 0);
+  uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
+  auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
+    return TA.Addr->getFlags() == Flags;
+  };
+  return locateNextRef(IA, RA, IsShadow).second;
+}
+
+// Create a new statement node in the block node BA that corresponds to
+// the machine instruction MI.
+void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
+  auto SA = newStmt(BA, &In);
+
+  // Collect a set of registers that this instruction implicitly uses
+  // or defines. Implicit operands from an instruction will be ignored
+  // unless they are listed here.
+  RegisterSet ImpUses, ImpDefs;
+  if (const uint16_t *ImpD = In.getDesc().getImplicitDefs())
+    while (uint16_t R = *ImpD++)
+      ImpDefs.insert({R, 0});
+  if (const uint16_t *ImpU = In.getDesc().getImplicitUses())
+    while (uint16_t R = *ImpU++)
+      ImpUses.insert({R, 0});
+
+  bool IsCall = In.isCall(), IsReturn = In.isReturn();
+  bool IsPredicated = TII.isPredicated(&In);
+  unsigned NumOps = In.getNumOperands();
+
+  // Avoid duplicate implicit defs. This will not detect cases of implicit
+  // defs that define registers that overlap, but it is not clear how to
+  // interpret that in the absence of explicit defs. Overlapping explicit
+  // defs are likely illegal already.
+  RegisterSet DoneDefs;
+  // Process explicit defs first.
+  for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+    MachineOperand &Op = In.getOperand(OpN);
+    if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+      continue;
+    RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+    uint16_t Flags = NodeAttrs::None;
+    if (TOI.isPreserving(In, OpN))
+      Flags |= NodeAttrs::Preserving;
+    if (TOI.isClobbering(In, OpN))
+      Flags |= NodeAttrs::Clobbering;
+    if (TOI.isFixedReg(In, OpN))
+      Flags |= NodeAttrs::Fixed;
+    NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+    SA.Addr->addMember(DA, *this);
+    DoneDefs.insert(RR);
+  }
+
+  // Process implicit defs, skipping those that have already been added
+  // as explicit.
+  for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+    MachineOperand &Op = In.getOperand(OpN);
+    if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
+      continue;
+    RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+    if (!IsCall && !ImpDefs.count(RR))
+      continue;
+    if (DoneDefs.count(RR))
+      continue;
+    uint16_t Flags = NodeAttrs::None;
+    if (TOI.isPreserving(In, OpN))
+      Flags |= NodeAttrs::Preserving;
+    if (TOI.isClobbering(In, OpN))
+      Flags |= NodeAttrs::Clobbering;
+    if (TOI.isFixedReg(In, OpN))
+      Flags |= NodeAttrs::Fixed;
+    NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+    SA.Addr->addMember(DA, *this);
+    DoneDefs.insert(RR);
+  }
+
+  for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+    MachineOperand &Op = In.getOperand(OpN);
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+    // Add implicit uses on return and call instructions, and on predicated
+    // instructions regardless of whether or not they appear in the instruction
+    // descriptor's list.
+    bool Implicit = Op.isImplicit();
+    bool TakeImplicit = IsReturn || IsCall || IsPredicated;
+    if (Implicit && !TakeImplicit && !ImpUses.count(RR))
+      continue;
+    uint16_t Flags = NodeAttrs::None;
+    if (TOI.isFixedReg(In, OpN))
+      Flags |= NodeAttrs::Fixed;
+    NodeAddr<UseNode*> UA = newUse(SA, Op, Flags);
+    SA.Addr->addMember(UA, *this);
+  }
+}
+
+// Build a map that for each block will have the set of all references from
+// that block, and from all blocks dominated by it.
+void DataFlowGraph::buildBlockRefs(NodeAddr<BlockNode*> BA,
+      BlockRefsMap &RefM) {
+  auto &Refs = RefM[BA.Id];
+  MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
+  assert(N);
+  for (auto I : *N) {
+    MachineBasicBlock *SB = I->getBlock();
+    auto SBA = Func.Addr->findBlock(SB, *this);
+    buildBlockRefs(SBA, RefM);
+    const auto &SRs = RefM[SBA.Id];
+    Refs.insert(SRs.begin(), SRs.end());
+  }
+
+  for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
+    for (NodeAddr<RefNode*> RA : IA.Addr->members(*this))
+      Refs.insert(RA.Addr->getRegRef());
+}
+
+// Scan all defs in the block node BA and record in PhiM the locations of
+// phi nodes corresponding to these defs.
+void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+      NodeAddr<BlockNode*> BA) {
+  // Check all defs from block BA and record them in each block in BA's
+  // iterated dominance frontier. This information will later be used to
+  // create phi nodes.
+  MachineBasicBlock *BB = BA.Addr->getCode();
+  assert(BB);
+  auto DFLoc = MDF.find(BB);
+  if (DFLoc == MDF.end() || DFLoc->second.empty())
+    return;
+
+  // Traverse all instructions in the block and collect the set of all
+  // defined references. For each reference there will be a phi created
+  // in the block's iterated dominance frontier.
+  // This is done to make sure that each defined reference gets only one
+  // phi node, even if it is defined multiple times.
+  RegisterSet Defs;
+  for (auto I : BA.Addr->members(*this)) {
+    assert(I.Addr->getType() == NodeAttrs::Code);
+    assert(I.Addr->getKind() == NodeAttrs::Phi ||
+           I.Addr->getKind() == NodeAttrs::Stmt);
+    NodeAddr<InstrNode*> IA = I;
+    for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this))
+      Defs.insert(RA.Addr->getRegRef());
+  }
+
+  // Finally, add the set of defs to each block in the iterated dominance
+  // frontier.
+  const MachineDominanceFrontier::DomSetType &DF = DFLoc->second;
+  SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end());
+  for (unsigned i = 0; i < IDF.size(); ++i) {
+    auto F = MDF.find(IDF[i]);
+    if (F != MDF.end())
+      IDF.insert(F->second.begin(), F->second.end());
+  }
+
+  // Get the register references that are reachable from this block.
+  RegisterSet &Refs = RefM[BA.Id];
+  for (auto DB : IDF) {
+    auto DBA = Func.Addr->findBlock(DB, *this);
+    const auto &Rs = RefM[DBA.Id];
+    Refs.insert(Rs.begin(), Rs.end());
+  }
+
+  for (auto DB : IDF) {
+    auto DBA = Func.Addr->findBlock(DB, *this);
+    PhiM[DBA.Id].insert(Defs.begin(), Defs.end());
+  }
+}
+
+// Given the locations of phi nodes in the map PhiM, create the phi nodes
+// that are located in the block node BA.
+void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+      NodeAddr<BlockNode*> BA) {
+  // Check if this blocks has any DF defs, i.e. if there are any defs
+  // that this block is in the iterated dominance frontier of.
+  auto HasDF = PhiM.find(BA.Id);
+  if (HasDF == PhiM.end() || HasDF->second.empty())
+    return;
+
+  // First, remove all R in Refs in such that there exists T in Refs
+  // such that T covers R. In other words, only leave those refs that
+  // are not covered by another ref (i.e. maximal with respect to covering).
+
+  auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
+    for (auto I : RRs)
+      if (I != RR && RAI.covers(I, RR))
+        RR = I;
+    return RR;
+  };
+
+  RegisterSet MaxDF;
+  for (auto I : HasDF->second)
+    MaxDF.insert(MaxCoverIn(I, HasDF->second));
+
+  std::vector<RegisterRef> MaxRefs;
+  auto &RefB = RefM[BA.Id];
+  for (auto I : MaxDF)
+    MaxRefs.push_back(MaxCoverIn(I, RefB));
+
+  // Now, for each R in MaxRefs, get the alias closure of R. If the closure
+  // only has R in it, create a phi a def for R. Otherwise, create a phi,
+  // and add a def for each S in the closure.
+
+  // Sort the refs so that the phis will be created in a deterministic order.
+  std::sort(MaxRefs.begin(), MaxRefs.end());
+  // Remove duplicates.
+  auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
+  MaxRefs.erase(NewEnd, MaxRefs.end());
+
+  auto Aliased = [this,&MaxRefs](RegisterRef RR,
+                                 std::vector<unsigned> &Closure) -> bool {
+    for (auto I : Closure)
+      if (RAI.alias(RR, MaxRefs[I]))
+        return true;
+    return false;
+  };
+
+  // Prepare a list of NodeIds of the block's predecessors.
+  std::vector<NodeId> PredList;
+  const MachineBasicBlock *MBB = BA.Addr->getCode();
+  for (auto PB : MBB->predecessors()) {
+    auto B = Func.Addr->findBlock(PB, *this);
+    PredList.push_back(B.Id);
+  }
+
+  while (!MaxRefs.empty()) {
+    // Put the first element in the closure, and then add all subsequent
+    // elements from MaxRefs to it, if they alias at least one element
+    // already in the closure.
+    // ClosureIdx: vector of indices in MaxRefs of members of the closure.
+    std::vector<unsigned> ClosureIdx = { 0 };
+    for (unsigned i = 1; i != MaxRefs.size(); ++i)
+      if (Aliased(MaxRefs[i], ClosureIdx))
+        ClosureIdx.push_back(i);
+
+    // Build a phi for the closure.
+    unsigned CS = ClosureIdx.size();
+    NodeAddr<PhiNode*> PA = newPhi(BA);
+
+    // Add defs.
+    for (unsigned X = 0; X != CS; ++X) {
+      RegisterRef RR = MaxRefs[ClosureIdx[X]];
+      uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+      NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+      PA.Addr->addMember(DA, *this);
+    }
+    // Add phi uses.
+    for (auto P : PredList) {
+      auto PBA = addr<BlockNode*>(P);
+      for (unsigned X = 0; X != CS; ++X) {
+        RegisterRef RR = MaxRefs[ClosureIdx[X]];
+        NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
+        PA.Addr->addMember(PUA, *this);
+      }
+    }
+
+    // Erase from MaxRefs all elements in the closure.
+    auto Begin = MaxRefs.begin();
+    for (unsigned i = ClosureIdx.size(); i != 0; --i)
+      MaxRefs.erase(Begin + ClosureIdx[i-1]);
+  }
+}
+
+// Remove any unneeded phi nodes that were created during the build process.
+void DataFlowGraph::removeUnusedPhis() {
+  // This will remove unused phis, i.e. phis where each def does not reach
+  // any uses or other defs. This will not detect or remove circular phi
+  // chains that are otherwise dead. Unused/dead phis are created during
+  // the build process and this function is intended to remove these cases
+  // that are easily determinable to be unnecessary.
+
+  SetVector<NodeId> PhiQ;
+  for (NodeAddr<BlockNode*> BA : Func.Addr->members(*this)) {
+    for (auto P : BA.Addr->members_if(IsPhi, *this))
+      PhiQ.insert(P.Id);
+  }
+
+  static auto HasUsedDef = [](NodeList &Ms) -> bool {
+    for (auto M : Ms) {
+      if (M.Addr->getKind() != NodeAttrs::Def)
+        continue;
+      NodeAddr<DefNode*> DA = M;
+      if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0)
+        return true;
+    }
+    return false;
+  };
+
+  // Any phi, if it is removed, may affect other phis (make them dead).
+  // For each removed phi, collect the potentially affected phis and add
+  // them back to the queue.
+  while (!PhiQ.empty()) {
+    auto PA = addr<PhiNode*>(PhiQ[0]);
+    PhiQ.remove(PA.Id);
+    NodeList Refs = PA.Addr->members(*this);
+    if (HasUsedDef(Refs))
+      continue;
+    for (NodeAddr<RefNode*> RA : Refs) {
+      if (NodeId RD = RA.Addr->getReachingDef()) {
+        auto RDA = addr<DefNode*>(RD);
+        NodeAddr<InstrNode*> OA = RDA.Addr->getOwner(*this);
+        if (IsPhi(OA))
+          PhiQ.insert(OA.Id);
+      }
+      if (RA.Addr->isDef())
+        unlinkDef(RA);
+      else
+        unlinkUse(RA);
+    }
+    NodeAddr<BlockNode*> BA = PA.Addr->getOwner(*this);
+    BA.Addr->removeMember(PA, *this);
+  }
+}
+
+// For a given reference node TA in an instruction node IA, connect the
+// reaching def of TA to the appropriate def node. Create any shadow nodes
+// as appropriate.
+template <typename T>
+void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
+      DefStack &DS) {
+  if (DS.empty())
+    return;
+  RegisterRef RR = TA.Addr->getRegRef();
+  NodeAddr<T> TAP;
+
+  // References from the def stack that have been examined so far.
+  RegisterSet Defs;
+
+  for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
+    RegisterRef QR = I->Addr->getRegRef();
+    auto AliasQR = [QR,this] (RegisterRef RR) -> bool {
+      return RAI.alias(QR, RR);
+    };
+    bool PrecUp = RAI.covers(QR, RR);
+    // Skip all defs that are aliased to any of the defs that we have already
+    // seen. If we encounter a covering def, stop the stack traversal early.
+    if (std::any_of(Defs.begin(), Defs.end(), AliasQR)) {
+      if (PrecUp)
+        break;
+      continue;
+    }
+    // The reaching def.
+    NodeAddr<DefNode*> RDA = *I;
+
+    // Pick the reached node.
+    if (TAP.Id == 0) {
+      TAP = TA;
+    } else {
+      // Mark the existing ref as "shadow" and create a new shadow.
+      TAP.Addr->setFlags(TAP.Addr->getFlags() | NodeAttrs::Shadow);
+      TAP = getNextShadow(IA, TAP, true);
+    }
+
+    // Create the link.
+    TAP.Addr->linkToDef(TAP.Id, RDA);
+
+    if (PrecUp)
+      break;
+    Defs.insert(QR);
+  }
+}
+
+// Create data-flow links for all reference nodes in the statement node SA.
+void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA) {
+  RegisterSet Defs;
+
+  // Link all nodes (upwards in the data-flow) with their reaching defs.
+  for (NodeAddr<RefNode*> RA : SA.Addr->members(*this)) {
+    uint16_t Kind = RA.Addr->getKind();
+    assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
+    RegisterRef RR = RA.Addr->getRegRef();
+    // Do not process multiple defs of the same reference.
+    if (Kind == NodeAttrs::Def && Defs.count(RR))
+      continue;
+    Defs.insert(RR);
+
+    auto F = DefM.find(RR);
+    if (F == DefM.end())
+      continue;
+    DefStack &DS = F->second;
+    if (Kind == NodeAttrs::Use)
+      linkRefUp<UseNode*>(SA, RA, DS);
+    else if (Kind == NodeAttrs::Def)
+      linkRefUp<DefNode*>(SA, RA, DS);
+    else
+      llvm_unreachable("Unexpected node in instruction");
+  }
+}
+
+// Create data-flow links for all instructions in the block node BA. This
+// will include updating any phi nodes in BA.
+void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
+  // Push block delimiters.
+  markBlock(BA.Id, DefM);
+
+  // For each non-phi instruction in the block, link all the defs and uses
+  // to their reaching defs. For any member of the block (including phis),
+  // push the defs on the corresponding stacks.
+  for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) {
+    // Ignore phi nodes here. They will be linked part by part from the
+    // predecessors.
+    if (IA.Addr->getKind() == NodeAttrs::Stmt)
+      linkStmtRefs(DefM, IA);
+
+    // Push the definitions on the stack.
+    pushDefs(IA, DefM);
+  }
+
+  // Recursively process all children in the dominator tree.
+  MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
+  for (auto I : *N) {
+    MachineBasicBlock *SB = I->getBlock();
+    auto SBA = Func.Addr->findBlock(SB, *this);
+    linkBlockRefs(DefM, SBA);
+  }
+
+  // Link the phi uses from the successor blocks.
+  auto IsUseForBA = [BA](NodeAddr<NodeBase*> NA) -> bool {
+    if (NA.Addr->getKind() != NodeAttrs::Use)
+      return false;
+    assert(NA.Addr->getFlags() & NodeAttrs::PhiRef);
+    NodeAddr<PhiUseNode*> PUA = NA;
+    return PUA.Addr->getPredecessor() == BA.Id;
+  };
+  MachineBasicBlock *MBB = BA.Addr->getCode();
+  for (auto SB : MBB->successors()) {
+    auto SBA = Func.Addr->findBlock(SB, *this);
+    for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) {
+      // Go over each phi use associated with MBB, and link it.
+      for (auto U : IA.Addr->members_if(IsUseForBA, *this)) {
+        NodeAddr<PhiUseNode*> PUA = U;
+        RegisterRef RR = PUA.Addr->getRegRef();
+        linkRefUp<UseNode*>(IA, PUA, DefM[RR]);
+      }
+    }
+  }
+
+  // Pop all defs from this block from the definition stacks.
+  releaseBlock(BA.Id, DefM);
+}
+
+// Remove the use node UA from any data-flow and structural links.
+void DataFlowGraph::unlinkUse(NodeAddr<UseNode*> UA) {
+  NodeId RD = UA.Addr->getReachingDef();
+  NodeId Sib = UA.Addr->getSibling();
+
+  NodeAddr<InstrNode*> IA = UA.Addr->getOwner(*this);
+  IA.Addr->removeMember(UA, *this);
+
+  if (RD == 0) {
+    assert(Sib == 0);
+    return;
+  }
+
+  auto RDA = addr<DefNode*>(RD);
+  auto TA = addr<UseNode*>(RDA.Addr->getReachedUse());
+  if (TA.Id == UA.Id) {
+    RDA.Addr->setReachedUse(Sib);
+    return;
+  }
+
+  while (TA.Id != 0) {
+    NodeId S = TA.Addr->getSibling();
+    if (S == UA.Id) {
+      TA.Addr->setSibling(UA.Addr->getSibling());
+      return;
+    }
+    TA = addr<UseNode*>(S);
+  }
+}
+
+// Remove the def node DA from any data-flow and structural links.
+void DataFlowGraph::unlinkDef(NodeAddr<DefNode*> DA) {
+  //
+  //         RD
+  //         | reached
+  //         | def
+  //         :
+  //         .
+  //        +----+
+  // ... -- | DA | -- ... -- 0  : sibling chain of DA
+  //        +----+
+  //         |  | reached
+  //         |  : def
+  //         |  .
+  //         | ...  : Siblings (defs)
+  //         |
+  //         : reached
+  //         . use
+  //        ... : sibling chain of reached uses
+
+  NodeId RD = DA.Addr->getReachingDef();
+
+  // Visit all siblings of the reached def and reset their reaching defs.
+  // Also, defs reached by DA are now "promoted" to being reached by RD,
+  // so all of them will need to be spliced into the sibling chain where
+  // DA belongs.
+  auto getAllNodes = [this] (NodeId N) -> NodeList {
+    NodeList Res;
+    while (N) {
+      auto RA = addr<RefNode*>(N);
+      // Keep the nodes in the exact sibling order.
+      Res.push_back(RA);
+      N = RA.Addr->getSibling();
+    }
+    return Res;
+  };
+  NodeList ReachedDefs = getAllNodes(DA.Addr->getReachedDef());
+  NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse());
+
+  if (RD == 0) {
+    for (NodeAddr<RefNode*> I : ReachedDefs)
+      I.Addr->setSibling(0);
+    for (NodeAddr<RefNode*> I : ReachedUses)
+      I.Addr->setSibling(0);
+  }
+  for (NodeAddr<DefNode*> I : ReachedDefs)
+    I.Addr->setReachingDef(RD);
+  for (NodeAddr<UseNode*> I : ReachedUses)
+    I.Addr->setReachingDef(RD);
+
+  NodeId Sib = DA.Addr->getSibling();
+  if (RD == 0) {
+    assert(Sib == 0);
+    return;
+  }
+
+  // Update the reaching def node and remove DA from the sibling list.
+  auto RDA = addr<DefNode*>(RD);
+  auto TA = addr<DefNode*>(RDA.Addr->getReachedDef());
+  if (TA.Id == DA.Id) {
+    // If DA is the first reached def, just update the RD's reached def
+    // to the DA's sibling.
+    RDA.Addr->setReachedDef(Sib);
+  } else {
+    // Otherwise, traverse the sibling list of the reached defs and remove
+    // DA from it.
+    while (TA.Id != 0) {
+      NodeId S = TA.Addr->getSibling();
+      if (S == DA.Id) {
+        TA.Addr->setSibling(Sib);
+        break;
+      }
+      TA = addr<DefNode*>(S);
+    }
+  }
+
+  // Splice the DA's reached defs into the RDA's reached def chain.
+  if (!ReachedDefs.empty()) {
+    auto Last = NodeAddr<DefNode*>(ReachedDefs.back());
+    Last.Addr->setSibling(RDA.Addr->getReachedDef());
+    RDA.Addr->setReachedDef(ReachedDefs.front().Id);
+  }
+  // Splice the DA's reached uses into the RDA's reached use chain.
+  if (!ReachedUses.empty()) {
+    auto Last = NodeAddr<UseNode*>(ReachedUses.back());
+    Last.Addr->setSibling(RDA.Addr->getReachedUse());
+    RDA.Addr->setReachedUse(ReachedUses.front().Id);
+  }
+
+  NodeAddr<InstrNode*> IA = DA.Addr->getOwner(*this);
+  IA.Addr->removeMember(DA, *this);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
new file mode 100644
index 0000000..7da7bb5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
@@ -0,0 +1,841 @@
+//===--- RDFGraph.h -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Target-independent, SSA-based data flow graph for register data flow (RDF)
+// for a non-SSA program representation (e.g. post-RA machine code).
+//
+//
+// *** Introduction
+//
+// The RDF graph is a collection of nodes, each of which denotes some element
+// of the program. There are two main types of such elements: code and refe-
+// rences. Conceptually, "code" is something that represents the structure
+// of the program, e.g. basic block or a statement, while "reference" is an
+// instance of accessing a register, e.g. a definition or a use. Nodes are
+// connected with each other based on the structure of the program (such as
+// blocks, instructions, etc.), and based on the data flow (e.g. reaching
+// definitions, reached uses, etc.). The single-reaching-definition principle
+// of SSA is generally observed, although, due to the non-SSA representation
+// of the program, there are some differences between the graph and a "pure"
+// SSA representation.
+//
+//
+// *** Implementation remarks
+//
+// Since the graph can contain a large number of nodes, memory consumption
+// was one of the major design considerations. As a result, there is a single
+// base class NodeBase which defines all members used by all possible derived
+// classes. The members are arranged in a union, and a derived class cannot
+// add any data members of its own. Each derived class only defines the
+// functional interface, i.e. member functions. NodeBase must be a POD,
+// which implies that all of its members must also be PODs.
+// Since nodes need to be connected with other nodes, pointers have been
+// replaced with 32-bit identifiers: each node has an id of type NodeId.
+// There are mapping functions in the graph that translate between actual
+// memory addresses and the corresponding identifiers.
+// A node id of 0 is equivalent to nullptr.
+//
+//
+// *** Structure of the graph
+//
+// A code node is always a collection of other nodes. For example, a code
+// node corresponding to a basic block will contain code nodes corresponding
+// to instructions. In turn, a code node corresponding to an instruction will
+// contain a list of reference nodes that correspond to the definitions and
+// uses of registers in that instruction. The members are arranged into a
+// circular list, which is yet another consequence of the effort to save
+// memory: for each member node it should be possible to obtain its owner,
+// and it should be possible to access all other members. There are other
+// ways to accomplish that, but the circular list seemed the most natural.
+//
+// +- CodeNode -+
+// |            | <---------------------------------------------------+
+// +-+--------+-+                                                     |
+//   |FirstM  |LastM                                                  |
+//   |        +-------------------------------------+                 |
+//   |                                              |                 |
+//   V                                              V                 |
+//  +----------+ Next +----------+ Next       Next +----------+ Next  |
+//  |          |----->|          |-----> ... ----->|          |----->-+
+//  +- Member -+      +- Member -+                 +- Member -+
+//
+// The order of members is such that related reference nodes (see below)
+// should be contiguous on the member list.
+//
+// A reference node is a node that encapsulates an access to a register,
+// in other words, data flowing into or out of a register. There are two
+// major kinds of reference nodes: defs and uses. A def node will contain
+// the id of the first reached use, and the id of the first reached def.
+// Each def and use will contain the id of the reaching def, and also the
+// id of the next reached def (for def nodes) or use (for use nodes).
+// The "next node sharing the same reaching def" is denoted as "sibling".
+// In summary:
+// - Def node contains: reaching def, sibling, first reached def, and first
+// reached use.
+// - Use node contains: reaching def and sibling.
+//
+// +-- DefNode --+
+// | R2 = ...    | <---+--------------------+
+// ++---------+--+     |                    |
+//  |Reached  |Reached |                    |
+//  |Def      |Use     |                    |
+//  |         |        |Reaching            |Reaching
+//  |         V        |Def                 |Def
+//  |      +-- UseNode --+ Sib  +-- UseNode --+ Sib       Sib
+//  |      | ... = R2    |----->| ... = R2    |----> ... ----> 0
+//  |      +-------------+      +-------------+
+//  V
+// +-- DefNode --+ Sib
+// | R2 = ...    |----> ...
+// ++---------+--+
+//  |         |
+//  |         |
+// ...       ...
+//
+// To get a full picture, the circular lists connecting blocks within a
+// function, instructions within a block, etc. should be superimposed with
+// the def-def, def-use links shown above.
+// To illustrate this, consider a small example in a pseudo-assembly:
+// foo:
+//   add r2, r0, r1   ; r2 = r0+r1
+//   addi r0, r2, 1   ; r0 = r2+1
+//   ret r0           ; return value in r0
+//
+// The graph (in a format used by the debugging functions) would look like:
+//
+//   DFG dump:[
+//   f1: Function foo
+//   b2: === BB#0 === preds(0), succs(0):
+//   p3: phi [d4<r0>(,d12,u9):]
+//   p5: phi [d6<r1>(,,u10):]
+//   s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):]
+//   s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):]
+//   s14: ret [u15<r0>(d12):]
+//   ]
+//
+// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the
+// kind of the node (i.e. f - function, b - basic block, p - phi, s - state-
+// ment, d - def, u - use).
+// The format of a def node is:
+//   dN<R>(rd,d,u):sib,
+// where
+//   N   - numeric node id,
+//   R   - register being defined
+//   rd  - reaching def,
+//   d   - reached def,
+//   u   - reached use,
+//   sib - sibling.
+// The format of a use node is:
+//   uN<R>[!](rd):sib,
+// where
+//   N   - numeric node id,
+//   R   - register being used,
+//   rd  - reaching def,
+//   sib - sibling.
+// Possible annotations (usually preceding the node id):
+//   +   - preserving def,
+//   ~   - clobbering def,
+//   "   - shadow ref (follows the node id),
+//   !   - fixed register (appears after register name).
+//
+// The circular lists are not explicit in the dump.
+//
+//
+// *** Node attributes
+//
+// NodeBase has a member "Attrs", which is the primary way of determining
+// the node's characteristics. The fields in this member decide whether
+// the node is a code node or a reference node (i.e. node's "type"), then
+// within each type, the "kind" determines what specifically this node
+// represents. The remaining bits, "flags", contain additional information
+// that is even more detailed than the "kind".
+// CodeNode's kinds are:
+// - Phi:   Phi node, members are reference nodes.
+// - Stmt:  Statement, members are reference nodes.
+// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt).
+// - Func:  The whole function. The members are basic block nodes.
+// RefNode's kinds are:
+// - Use.
+// - Def.
+//
+// Meaning of flags:
+// - Preserving: applies only to defs. A preserving def is one that can
+//   preserve some of the original bits among those that are included in
+//   the register associated with that def. For example, if R0 is a 32-bit
+//   register, but a def can only change the lower 16 bits, then it will
+//   be marked as preserving.
+// - Shadow: a reference that has duplicates holding additional reaching
+//   defs (see more below).
+// - Clobbering: applied only to defs, indicates that the value generated
+//   by this def is unspecified. A typical example would be volatile registers
+//   after function calls.
+//
+//
+// *** Shadow references
+//
+// It may happen that a super-register can have two (or more) non-overlapping
+// sub-registers. When both of these sub-registers are defined and followed
+// by a use of the super-register, the use of the super-register will not
+// have a unique reaching def: both defs of the sub-registers need to be
+// accounted for. In such cases, a duplicate use of the super-register is
+// added and it points to the extra reaching def. Both uses are marked with
+// a flag "shadow". Example:
+// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap:
+//   set r0, 1        ; r0 = 1
+//   set r1, 1        ; r1 = 1
+//   addi t1, t0, 1   ; t1 = t0+1
+//
+// The DFG:
+//   s1: set [d2<r0>(,,u9):]
+//   s3: set [d4<r1>(,,u10):]
+//   s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):]
+//
+// The statement s5 has two use nodes for t0: u7" and u9". The quotation
+// mark " indicates that the node is a shadow.
+//
+#ifndef RDF_GRAPH_H
+#define RDF_GRAPH_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <functional>
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class MachineOperand;
+  class MachineDominanceFrontier;
+  class MachineDominatorTree;
+  class TargetInstrInfo;
+  class TargetRegisterInfo;
+}
+
+namespace rdf {
+  typedef uint32_t NodeId;
+
+  struct NodeAttrs {
+    enum : uint16_t {
+      None          = 0x0000,   // Nothing
+
+      // Types: 2 bits
+      TypeMask      = 0x0003,
+      Code          = 0x0001,   // 01, Container
+      Ref           = 0x0002,   // 10, Reference
+
+      // Kind: 3 bits
+      KindMask      = 0x0007 << 2,
+      Def           = 0x0001 << 2,  // 001
+      Use           = 0x0002 << 2,  // 010
+      Phi           = 0x0003 << 2,  // 011
+      Stmt          = 0x0004 << 2,  // 100
+      Block         = 0x0005 << 2,  // 101
+      Func          = 0x0006 << 2,  // 110
+
+      // Flags: 5 bits for now
+      FlagMask      = 0x001F << 5,
+      Shadow        = 0x0001 << 5,  // 00001, Has extra reaching defs.
+      Clobbering    = 0x0002 << 5,  // 00010, Produces unspecified values.
+      PhiRef        = 0x0004 << 5,  // 00100, Member of PhiNode.
+      Preserving    = 0x0008 << 5,  // 01000, Def can keep original bits.
+      Fixed         = 0x0010 << 5,  // 10000, Fixed register.
+    };
+
+    static uint16_t type(uint16_t T)  { return T & TypeMask; }
+    static uint16_t kind(uint16_t T)  { return T & KindMask; }
+    static uint16_t flags(uint16_t T) { return T & FlagMask; }
+
+    static uint16_t set_type(uint16_t A, uint16_t T) {
+      return (A & ~TypeMask) | T;
+    }
+    static uint16_t set_kind(uint16_t A, uint16_t K) {
+      return (A & ~KindMask) | K;
+    }
+    static uint16_t set_flags(uint16_t A, uint16_t F) {
+      return (A & ~FlagMask) | F;
+    }
+
+    // Test if A contains B.
+    static bool contains(uint16_t A, uint16_t B) {
+      if (type(A) != Code)
+        return false;
+      uint16_t KB = kind(B);
+      switch (kind(A)) {
+        case Func:
+          return KB == Block;
+        case Block:
+          return KB == Phi || KB == Stmt;
+        case Phi:
+        case Stmt:
+          return type(B) == Ref;
+      }
+      return false;
+    }
+  };
+
+  template <typename T> struct NodeAddr {
+    NodeAddr() : Addr(nullptr), Id(0) {}
+    NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
+    NodeAddr(const NodeAddr&) = default;
+    NodeAddr &operator= (const NodeAddr&) = default;
+
+    bool operator== (const NodeAddr<T> &NA) const {
+      assert((Addr == NA.Addr) == (Id == NA.Id));
+      return Addr == NA.Addr;
+    }
+    bool operator!= (const NodeAddr<T> &NA) const {
+      return !operator==(NA);
+    }
+    // Type cast (casting constructor). The reason for having this class
+    // instead of std::pair.
+    template <typename S> NodeAddr(const NodeAddr<S> &NA)
+      : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+
+    T Addr;
+    NodeId Id;
+  };
+
+  struct NodeBase;
+
+  // Fast memory allocation and translation between node id and node address.
+  // This is really the same idea as the one underlying the "bump pointer
+  // allocator", the difference being in the translation. A node id is
+  // composed of two components: the index of the block in which it was
+  // allocated, and the index within the block. With the default settings,
+  // where the number of nodes per block is 4096, the node id (minus 1) is:
+  //
+  // bit position:                11             0
+  // +----------------------------+--------------+
+  // | Index of the block         |Index in block|
+  // +----------------------------+--------------+
+  //
+  // The actual node id is the above plus 1, to avoid creating a node id of 0.
+  //
+  // This method significantly improved the build time, compared to using maps
+  // (std::unordered_map or DenseMap) to translate between pointers and ids.
+  struct NodeAllocator {
+    // Amount of storage for a single node.
+    enum { NodeMemSize = 32 };
+    NodeAllocator(uint32_t NPB = 4096)
+        : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
+          IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
+      assert(isPowerOf2_32(NPB));
+    }
+    NodeBase *ptr(NodeId N) const {
+      uint32_t N1 = N-1;
+      uint32_t BlockN = N1 >> BitsPerIndex;
+      uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
+      return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
+    }
+    NodeId id(const NodeBase *P) const;
+    NodeAddr<NodeBase*> New();
+    void clear();
+
+  private:
+    void startNewBlock();
+    bool needNewBlock();
+    uint32_t makeId(uint32_t Block, uint32_t Index) const {
+      // Add 1 to the id, to avoid the id of 0, which is treated as "null".
+      return ((Block << BitsPerIndex) | Index) + 1;
+    }
+
+    const uint32_t NodesPerBlock;
+    const uint32_t BitsPerIndex;
+    const uint32_t IndexMask;
+    char *ActiveEnd;
+    std::vector<char*> Blocks;
+    typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
+    AllocatorTy MemPool;
+  };
+
+  struct RegisterRef {
+    unsigned Reg, Sub;
+
+    // No non-trivial constructors, since this will be a member of a union.
+    RegisterRef() = default;
+    RegisterRef(const RegisterRef &RR) = default;
+    RegisterRef &operator= (const RegisterRef &RR) = default;
+    bool operator== (const RegisterRef &RR) const {
+      return Reg == RR.Reg && Sub == RR.Sub;
+    }
+    bool operator!= (const RegisterRef &RR) const {
+      return !operator==(RR);
+    }
+    bool operator< (const RegisterRef &RR) const {
+      return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub);
+    }
+  };
+  typedef std::set<RegisterRef> RegisterSet;
+
+  struct RegisterAliasInfo {
+    RegisterAliasInfo(const TargetRegisterInfo &tri) : TRI(tri) {}
+    virtual ~RegisterAliasInfo() {}
+
+    virtual std::vector<RegisterRef> getAliasSet(RegisterRef RR) const;
+    virtual bool alias(RegisterRef RA, RegisterRef RB) const;
+    virtual bool covers(RegisterRef RA, RegisterRef RB) const;
+    virtual bool covers(const RegisterSet &RRs, RegisterRef RR) const;
+
+    const TargetRegisterInfo &TRI;
+  };
+
+  struct TargetOperandInfo {
+    TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
+    virtual ~TargetOperandInfo() {}
+    virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
+    virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
+    virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
+
+    const TargetInstrInfo &TII;
+  };
+
+
+  struct DataFlowGraph;
+
+  struct NodeBase {
+  public:
+    // Make sure this is a POD.
+    NodeBase() = default;
+    uint16_t getType()  const { return NodeAttrs::type(Attrs); }
+    uint16_t getKind()  const { return NodeAttrs::kind(Attrs); }
+    uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
+    NodeId   getNext()  const { return Next; }
+
+    uint16_t getAttrs() const { return Attrs; }
+    void setAttrs(uint16_t A) { Attrs = A; }
+    void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); }
+
+    // Insert node NA after "this" in the circular chain.
+    void append(NodeAddr<NodeBase*> NA);
+    // Initialize all members to 0.
+    void init() { memset(this, 0, sizeof *this); }
+    void setNext(NodeId N) { Next = N; }
+
+  protected:
+    uint16_t Attrs;
+    uint16_t Reserved;
+    NodeId Next;                // Id of the next node in the circular chain.
+    // Definitions of nested types. Using anonymous nested structs would make
+    // this class definition clearer, but unnamed structs are not a part of
+    // the standard.
+    struct Def_struct  {
+      NodeId DD, DU;          // Ids of the first reached def and use.
+    };
+    struct PhiU_struct  {
+      NodeId PredB;           // Id of the predecessor block for a phi use.
+    };
+    struct Code_struct {
+      void *CP;               // Pointer to the actual code.
+      NodeId FirstM, LastM;   // Id of the first member and last.
+    };
+    struct Ref_struct {
+      NodeId RD, Sib;         // Ids of the reaching def and the sibling.
+      union {
+        Def_struct Def;
+        PhiU_struct PhiU;
+      };
+      union {
+        MachineOperand *Op;   // Non-phi refs point to a machine operand.
+        RegisterRef RR;       // Phi refs store register info directly.
+      };
+    };
+
+    // The actual payload.
+    union {
+      Ref_struct Ref;
+      Code_struct Code;
+    };
+  };
+  // The allocator allocates chunks of 32 bytes for each node. The fact that
+  // each node takes 32 bytes in memory is used for fast translation between
+  // the node id and the node address.
+  static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
+        "NodeBase must be at most NodeAllocator::NodeMemSize bytes");
+
+  typedef std::vector<NodeAddr<NodeBase*>> NodeList;
+  typedef std::set<NodeId> NodeSet;
+
+  struct RefNode : public NodeBase {
+    RefNode() = default;
+    RegisterRef getRegRef() const;
+    MachineOperand &getOp() {
+      assert(!(getFlags() & NodeAttrs::PhiRef));
+      return *Ref.Op;
+    }
+    void setRegRef(RegisterRef RR);
+    void setRegRef(MachineOperand *Op);
+    NodeId getReachingDef() const {
+      return Ref.RD;
+    }
+    void setReachingDef(NodeId RD) {
+      Ref.RD = RD;
+    }
+    NodeId getSibling() const {
+      return Ref.Sib;
+    }
+    void setSibling(NodeId Sib) {
+      Ref.Sib = Sib;
+    }
+    bool isUse() const {
+      assert(getType() == NodeAttrs::Ref);
+      return getKind() == NodeAttrs::Use;
+    }
+    bool isDef() const {
+      assert(getType() == NodeAttrs::Ref);
+      return getKind() == NodeAttrs::Def;
+    }
+
+    template <typename Predicate>
+    NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
+        const DataFlowGraph &G);
+    NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
+  };
+
+  struct DefNode : public RefNode {
+    NodeId getReachedDef() const {
+      return Ref.Def.DD;
+    }
+    void setReachedDef(NodeId D) {
+      Ref.Def.DD = D;
+    }
+    NodeId getReachedUse() const {
+      return Ref.Def.DU;
+    }
+    void setReachedUse(NodeId U) {
+      Ref.Def.DU = U;
+    }
+
+    void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
+  };
+
+  struct UseNode : public RefNode {
+    void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
+  };
+
+  struct PhiUseNode : public UseNode {
+    NodeId getPredecessor() const {
+      assert(getFlags() & NodeAttrs::PhiRef);
+      return Ref.PhiU.PredB;
+    }
+    void setPredecessor(NodeId B) {
+      assert(getFlags() & NodeAttrs::PhiRef);
+      Ref.PhiU.PredB = B;
+    }
+  };
+
+  struct CodeNode : public NodeBase {
+    template <typename T> T getCode() const {
+      return static_cast<T>(Code.CP);
+    }
+    void setCode(void *C) {
+      Code.CP = C;
+    }
+
+    NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const;
+    NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const;
+    void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+    void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
+        const DataFlowGraph &G);
+    void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+
+    NodeList members(const DataFlowGraph &G) const;
+    template <typename Predicate>
+    NodeList members_if(Predicate P, const DataFlowGraph &G) const;
+  };
+
+  struct InstrNode : public CodeNode {
+    NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
+  };
+
+  struct PhiNode : public InstrNode {
+    MachineInstr *getCode() const {
+      return nullptr;
+    }
+  };
+
+  struct StmtNode : public InstrNode {
+    MachineInstr *getCode() const {
+      return CodeNode::getCode<MachineInstr*>();
+    }
+  };
+
+  struct BlockNode : public CodeNode {
+    MachineBasicBlock *getCode() const {
+      return CodeNode::getCode<MachineBasicBlock*>();
+    }
+    void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
+  };
+
+  struct FuncNode : public CodeNode {
+    MachineFunction *getCode() const {
+      return CodeNode::getCode<MachineFunction*>();
+    }
+    NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
+        const DataFlowGraph &G) const;
+    NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
+  };
+
+  struct DataFlowGraph {
+    DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+        const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+        const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai,
+        const TargetOperandInfo &toi);
+
+    NodeBase *ptr(NodeId N) const;
+    template <typename T> T ptr(NodeId N) const {
+      return static_cast<T>(ptr(N));
+    }
+    NodeId id(const NodeBase *P) const;
+
+    template <typename T> NodeAddr<T> addr(NodeId N) const {
+      return { ptr<T>(N), N };
+    }
+
+    NodeAddr<FuncNode*> getFunc() const {
+      return Func;
+    }
+    MachineFunction &getMF() const {
+      return MF;
+    }
+    const TargetInstrInfo &getTII() const {
+      return TII;
+    }
+    const TargetRegisterInfo &getTRI() const {
+      return TRI;
+    }
+    const MachineDominatorTree &getDT() const {
+      return MDT;
+    }
+    const MachineDominanceFrontier &getDF() const {
+      return MDF;
+    }
+    const RegisterAliasInfo &getRAI() const {
+      return RAI;
+    }
+
+    struct DefStack {
+      DefStack() = default;
+      bool empty() const { return Stack.empty() || top() == bottom(); }
+    private:
+      typedef NodeAddr<DefNode*> value_type;
+      struct Iterator {
+        typedef DefStack::value_type value_type;
+        Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
+        Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
+        value_type operator*() const {
+          assert(Pos >= 1);
+          return DS.Stack[Pos-1];
+        }
+        const value_type *operator->() const {
+          assert(Pos >= 1);
+          return &DS.Stack[Pos-1];
+        }
+        bool operator==(const Iterator &It) const { return Pos == It.Pos; }
+        bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
+      private:
+        Iterator(const DefStack &S, bool Top);
+        // Pos-1 is the index in the StorageType object that corresponds to
+        // the top of the DefStack.
+        const DefStack &DS;
+        unsigned Pos;
+        friend struct DefStack;
+      };
+    public:
+      typedef Iterator iterator;
+      iterator top() const { return Iterator(*this, true); }
+      iterator bottom() const { return Iterator(*this, false); }
+      unsigned size() const;
+
+      void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); }
+      void pop();
+      void start_block(NodeId N);
+      void clear_block(NodeId N);
+    private:
+      friend struct Iterator;
+      typedef std::vector<value_type> StorageType;
+      bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
+        return (P.Addr == nullptr) && (N == 0 || P.Id == N);
+      }
+      unsigned nextUp(unsigned P) const;
+      unsigned nextDown(unsigned P) const;
+      StorageType Stack;
+    };
+
+    typedef std::map<RegisterRef,DefStack> DefStackMap;
+
+    void build();
+    void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+    void markBlock(NodeId B, DefStackMap &DefM);
+    void releaseBlock(NodeId B, DefStackMap &DefM);
+
+    NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
+        NodeAddr<RefNode*> RA) const;
+    NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
+        NodeAddr<RefNode*> RA, bool Create);
+    NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
+        NodeAddr<RefNode*> RA) const;
+    NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
+        NodeAddr<RefNode*> RA, bool Create);
+    NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
+        NodeAddr<RefNode*> RA) const;
+
+    NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
+        NodeAddr<RefNode*> RA) const;
+
+    void unlinkUse(NodeAddr<UseNode*> UA);
+    void unlinkDef(NodeAddr<DefNode*> DA);
+
+    // Some useful filters.
+    template <uint16_t Kind>
+    static bool IsRef(const NodeAddr<NodeBase*> BA) {
+      return BA.Addr->getType() == NodeAttrs::Ref &&
+             BA.Addr->getKind() == Kind;
+    }
+    template <uint16_t Kind>
+    static bool IsCode(const NodeAddr<NodeBase*> BA) {
+      return BA.Addr->getType() == NodeAttrs::Code &&
+             BA.Addr->getKind() == Kind;
+    }
+    static bool IsDef(const NodeAddr<NodeBase*> BA) {
+      return BA.Addr->getType() == NodeAttrs::Ref &&
+             BA.Addr->getKind() == NodeAttrs::Def;
+    }
+    static bool IsUse(const NodeAddr<NodeBase*> BA) {
+      return BA.Addr->getType() == NodeAttrs::Ref &&
+             BA.Addr->getKind() == NodeAttrs::Use;
+    }
+    static bool IsPhi(const NodeAddr<NodeBase*> BA) {
+      return BA.Addr->getType() == NodeAttrs::Code &&
+             BA.Addr->getKind() == NodeAttrs::Phi;
+    }
+
+  private:
+    void reset();
+
+    NodeAddr<NodeBase*> newNode(uint16_t Attrs);
+    NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B);
+    NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner,
+        MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+    NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner,
+        RegisterRef RR, NodeAddr<BlockNode*> PredB,
+        uint16_t Flags = NodeAttrs::PhiRef);
+    NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
+        MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+    NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
+        RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef);
+    NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner);
+    NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner,
+        MachineInstr *MI);
+    NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner,
+        MachineBasicBlock *BB);
+    NodeAddr<FuncNode*> newFunc(MachineFunction *MF);
+
+    template <typename Predicate>
+    std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
+    locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+        Predicate P) const;
+
+    typedef std::map<NodeId,RegisterSet> BlockRefsMap;
+
+    void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In);
+    void buildBlockRefs(NodeAddr<BlockNode*> BA, BlockRefsMap &RefM);
+    void recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+        NodeAddr<BlockNode*> BA);
+    void buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+        NodeAddr<BlockNode*> BA);
+    void removeUnusedPhis();
+
+    template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
+        NodeAddr<T> TA, DefStack &DS);
+    void linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA);
+    void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
+
+    TimerGroup TimeG;
+    NodeAddr<FuncNode*> Func;
+    NodeAllocator Memory;
+
+    MachineFunction &MF;
+    const TargetInstrInfo &TII;
+    const TargetRegisterInfo &TRI;
+    const MachineDominatorTree &MDT;
+    const MachineDominanceFrontier &MDF;
+    const RegisterAliasInfo &RAI;
+    const TargetOperandInfo &TOI;
+  };  // struct DataFlowGraph
+
+  template <typename Predicate>
+  NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P,
+        bool NextOnly, const DataFlowGraph &G) {
+    // Get the "Next" reference in the circular list that references RR and
+    // satisfies predicate "Pred".
+    auto NA = G.addr<NodeBase*>(getNext());
+
+    while (NA.Addr != this) {
+      if (NA.Addr->getType() == NodeAttrs::Ref) {
+        NodeAddr<RefNode*> RA = NA;
+        if (RA.Addr->getRegRef() == RR && P(NA))
+          return NA;
+        if (NextOnly)
+          break;
+        NA = G.addr<NodeBase*>(NA.Addr->getNext());
+      } else {
+        // We've hit the beginning of the chain.
+        assert(NA.Addr->getType() == NodeAttrs::Code);
+        NodeAddr<CodeNode*> CA = NA;
+        NA = CA.Addr->getFirstMember(G);
+      }
+    }
+    // Return the equivalent of "nullptr" if such a node was not found.
+    return NodeAddr<RefNode*>();
+  }
+
+  template <typename Predicate>
+  NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const {
+    NodeList MM;
+    auto M = getFirstMember(G);
+    if (M.Id == 0)
+      return MM;
+
+    while (M.Addr != this) {
+      if (P(M))
+        MM.push_back(M);
+      M = G.addr<NodeBase*>(M.Addr->getNext());
+    }
+    return MM;
+  }
+
+
+  template <typename T> struct Print;
+  template <typename T>
+  raw_ostream &operator<< (raw_ostream &OS, const Print<T> &P);
+
+  template <typename T>
+  struct Print {
+    Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
+    const T &Obj;
+    const DataFlowGraph &G;
+  };
+
+  template <typename T>
+  struct PrintNode : Print<NodeAddr<T>> {
+    PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
+      : Print<NodeAddr<T>>(x, g) {}
+  };
+} // namespace rdf
+
+#endif // RDF_GRAPH_H
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
new file mode 100644
index 0000000..1d9bd37
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
@@ -0,0 +1,848 @@
+//===--- RDFLiveness.cpp --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Computation of the liveness information from the data-flow graph.
+//
+// The main functionality of this code is to compute block live-in
+// information. With the live-in information in place, the placement
+// of kill flags can also be recalculated.
+//
+// The block live-in calculation is based on the ideas from the following
+// publication:
+//
+// Dibyendu Das, Ramakrishna Upadrasta, Benoit Dupont de Dinechin.
+// "Efficient Liveness Computation Using Merge Sets and DJ-Graphs."
+// ACM Transactions on Architecture and Code Optimization, Association for
+// Computing Machinery, 2012, ACM TACO Special Issue on "High-Performance
+// and Embedded Architectures and Compilers", 8 (4),
+// <10.1145/2086696.2086706>. <hal-00647369>
+//
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+namespace rdf {
+  template<>
+  raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
+    OS << '{';
+    for (auto I : P.Obj) {
+      OS << ' ' << Print<RegisterRef>(I.first, P.G) << '{';
+      for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
+        OS << Print<NodeId>(*J, P.G);
+        if (++J != E)
+          OS << ',';
+      }
+      OS << '}';
+    }
+    OS << " }";
+    return OS;
+  }
+}
+
+// The order in the returned sequence is the order of reaching defs in the
+// upward traversal: the first def is the closest to the given reference RefA,
+// the next one is further up, and so on.
+// The list ends at a reaching phi def, or when the reference from RefA is
+// covered by the defs in the list (see FullChain).
+// This function provides two modes of operation:
+// (1) Returning the sequence of reaching defs for a particular reference
+// node. This sequence will terminate at the first phi node [1].
+// (2) Returning a partial sequence of reaching defs, where the final goal
+// is to traverse past phi nodes to the actual defs arising from the code
+// itself.
+// In mode (2), the register reference for which the search was started
+// may be different from the reference node RefA, for which this call was
+// made, hence the argument RefRR, which holds the original register.
+// Also, some definitions may have already been encountered in a previous
+// call that will influence register covering. The register references
+// already defined are passed in through DefRRs.
+// In mode (1), the "continuation" considerations do not apply, and the
+// RefRR is the same as the register in RefA, and the set DefRRs is empty.
+//
+// [1] It is possible for multiple phi nodes to be included in the returned
+// sequence:
+//   SubA = phi ...
+//   SubB = phi ...
+//   ...  = SuperAB(rdef:SubA), SuperAB"(rdef:SubB)
+// However, these phi nodes are independent from one another in terms of
+// the data-flow.
+
+NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
+      NodeAddr<RefNode*> RefA, bool FullChain, const RegisterSet &DefRRs) {
+  SetVector<NodeId> DefQ;
+  SetVector<NodeId> Owners;
+
+  // The initial queue should not have reaching defs for shadows. The
+  // whole point of a shadow is that it will have a reaching def that
+  // is not aliased to the reaching defs of the related shadows.
+  NodeId Start = RefA.Id;
+  auto SNA = DFG.addr<RefNode*>(Start);
+  if (NodeId RD = SNA.Addr->getReachingDef())
+    DefQ.insert(RD);
+
+  // Collect all the reaching defs, going up until a phi node is encountered,
+  // or there are no more reaching defs. From this set, the actual set of
+  // reaching defs will be selected.
+  // The traversal upwards must go on until a covering def is encountered.
+  // It is possible that a collection of non-covering (individually) defs
+  // will be sufficient, but keep going until a covering one is found.
+  for (unsigned i = 0; i < DefQ.size(); ++i) {
+    auto TA = DFG.addr<DefNode*>(DefQ[i]);
+    if (TA.Addr->getFlags() & NodeAttrs::PhiRef)
+      continue;
+    // Stop at the covering/overwriting def of the initial register reference.
+    RegisterRef RR = TA.Addr->getRegRef();
+    if (RAI.covers(RR, RefRR)) {
+      uint16_t Flags = TA.Addr->getFlags();
+      if (!(Flags & NodeAttrs::Preserving))
+        continue;
+    }
+    // Get the next level of reaching defs. This will include multiple
+    // reaching defs for shadows.
+    for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
+      if (auto RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+        DefQ.insert(RD);
+  }
+
+  // Remove all non-phi defs that are not aliased to RefRR, and collect
+  // the owners of the remaining defs.
+  SetVector<NodeId> Defs;
+  for (auto N : DefQ) {
+    auto TA = DFG.addr<DefNode*>(N);
+    bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
+    if (!IsPhi && !RAI.alias(RefRR, TA.Addr->getRegRef()))
+      continue;
+    Defs.insert(TA.Id);
+    Owners.insert(TA.Addr->getOwner(DFG).Id);
+  }
+
+  // Return the MachineBasicBlock containing a given instruction.
+  auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* {
+    if (IA.Addr->getKind() == NodeAttrs::Stmt)
+      return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent();
+    assert(IA.Addr->getKind() == NodeAttrs::Phi);
+    NodeAddr<PhiNode*> PA = IA;
+    NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG);
+    return BA.Addr->getCode();
+  };
+  // Less(A,B) iff instruction A is further down in the dominator tree than B.
+  auto Less = [&Block,this] (NodeId A, NodeId B) -> bool {
+    if (A == B)
+      return false;
+    auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B);
+    MachineBasicBlock *BA = Block(OA), *BB = Block(OB);
+    if (BA != BB)
+      return MDT.dominates(BB, BA);
+    // They are in the same block.
+    bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
+    bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
+    if (StmtA) {
+      if (!StmtB)   // OB is a phi and phis dominate statements.
+        return true;
+      auto CA = NodeAddr<StmtNode*>(OA).Addr->getCode();
+      auto CB = NodeAddr<StmtNode*>(OB).Addr->getCode();
+      // The order must be linear, so tie-break such equalities.
+      if (CA == CB)
+        return A < B;
+      return MDT.dominates(CB, CA);
+    } else {
+      // OA is a phi.
+      if (StmtB)
+        return false;
+      // Both are phis. There is no ordering between phis (in terms of
+      // the data-flow), so tie-break this via node id comparison.
+      return A < B;
+    }
+  };
+
+  std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
+  std::sort(Tmp.begin(), Tmp.end(), Less);
+
+  // The vector is a list of instructions, so that defs coming from
+  // the same instruction don't need to be artificially ordered.
+  // Then, when computing the initial segment, and iterating over an
+  // instruction, pick the defs that contribute to the covering (i.e. is
+  // not covered by previously added defs). Check the defs individually,
+  // i.e. first check each def if is covered or not (without adding them
+  // to the tracking set), and then add all the selected ones.
+
+  // The reason for this is this example:
+  // *d1<A>, *d2<B>, ... Assume A and B are aliased (can happen in phi nodes).
+  // *d3<C>              If A \incl BuC, and B \incl AuC, then *d2 would be
+  //                     covered if we added A first, and A would be covered
+  //                     if we added B first.
+
+  NodeList RDefs;
+  RegisterSet RRs = DefRRs;
+
+  auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool {
+    return TA.Addr->getKind() == NodeAttrs::Def &&
+           Defs.count(TA.Id);
+  };
+  for (auto T : Tmp) {
+    if (!FullChain && RAI.covers(RRs, RefRR))
+      break;
+    auto TA = DFG.addr<InstrNode*>(T);
+    bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA);
+    NodeList Ds;
+    for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) {
+      auto QR = DA.Addr->getRegRef();
+      // Add phi defs even if they are covered by subsequent defs. This is
+      // for cases where the reached use is not covered by any of the defs
+      // encountered so far: the phi def is needed to expose the liveness
+      // of that use to the entry of the block.
+      // Example:
+      //   phi d1<R3>(,d2,), ...  Phi def d1 is covered by d2.
+      //   d2<R3>(d1,,u3), ...
+      //   ..., u3<D1>(d2)        This use needs to be live on entry.
+      if (FullChain || IsPhi || !RAI.covers(RRs, QR))
+        Ds.push_back(DA);
+    }
+    RDefs.insert(RDefs.end(), Ds.begin(), Ds.end());
+    for (NodeAddr<DefNode*> DA : Ds) {
+      // When collecting a full chain of definitions, do not consider phi
+      // defs to actually define a register.
+      uint16_t Flags = DA.Addr->getFlags();
+      if (!FullChain || !(Flags & NodeAttrs::PhiRef))
+        if (!(Flags & NodeAttrs::Preserving))
+          RRs.insert(DA.Addr->getRegRef());
+    }
+  }
+
+  return RDefs;
+}
+
+
+static const RegisterSet NoRegs;
+
+NodeList Liveness::getAllReachingDefs(NodeAddr<RefNode*> RefA) {
+  return getAllReachingDefs(RefA.Addr->getRegRef(), RefA, false, NoRegs);
+}
+
+
+void Liveness::computePhiInfo() {
+  NodeList Phis;
+  NodeAddr<FuncNode*> FA = DFG.getFunc();
+  auto Blocks = FA.Addr->members(DFG);
+  for (NodeAddr<BlockNode*> BA : Blocks) {
+    auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+    Phis.insert(Phis.end(), Ps.begin(), Ps.end());
+  }
+
+  // phi use -> (map: reaching phi -> set of registers defined in between)
+  std::map<NodeId,std::map<NodeId,RegisterSet>> PhiUp;
+  std::vector<NodeId> PhiUQ;  // Work list of phis for upward propagation.
+
+  // Go over all phis.
+  for (NodeAddr<PhiNode*> PhiA : Phis) {
+    // Go over all defs and collect the reached uses that are non-phi uses
+    // (i.e. the "real uses").
+    auto &RealUses = RealUseMap[PhiA.Id];
+    auto PhiRefs = PhiA.Addr->members(DFG);
+
+    // Have a work queue of defs whose reached uses need to be found.
+    // For each def, add to the queue all reached (non-phi) defs.
+    SetVector<NodeId> DefQ;
+    NodeSet PhiDefs;
+    for (auto R : PhiRefs) {
+      if (!DFG.IsRef<NodeAttrs::Def>(R))
+        continue;
+      DefQ.insert(R.Id);
+      PhiDefs.insert(R.Id);
+    }
+    for (unsigned i = 0; i < DefQ.size(); ++i) {
+      NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]);
+      NodeId UN = DA.Addr->getReachedUse();
+      while (UN != 0) {
+        NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
+        if (!(A.Addr->getFlags() & NodeAttrs::PhiRef))
+          RealUses[getRestrictedRegRef(A)].insert(A.Id);
+        UN = A.Addr->getSibling();
+      }
+      NodeId DN = DA.Addr->getReachedDef();
+      while (DN != 0) {
+        NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN);
+        for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) {
+          uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags();
+          // Must traverse the reached-def chain. Consider:
+          //   def(D0) -> def(R0) -> def(R0) -> use(D0)
+          // The reachable use of D0 passes through a def of R0.
+          if (!(Flags & NodeAttrs::PhiRef))
+            DefQ.insert(T.Id);
+        }
+        DN = A.Addr->getSibling();
+      }
+    }
+    // Filter out these uses that appear to be reachable, but really
+    // are not. For example:
+    //
+    // R1:0 =          d1
+    //      = R1:0     u2     Reached by d1.
+    //   R0 =          d3
+    //      = R1:0     u4     Still reached by d1: indirectly through
+    //                        the def d3.
+    //   R1 =          d5
+    //      = R1:0     u6     Not reached by d1 (covered collectively
+    //                        by d3 and d5), but following reached
+    //                        defs and uses from d1 will lead here.
+    auto HasDef = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool {
+      return PhiDefs.count(DA.Id);
+    };
+    for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) {
+      // For each reached register UI->first, there is a set UI->second, of
+      // uses of it. For each such use, check if it is reached by this phi,
+      // i.e. check if the set of its reaching uses intersects the set of
+      // this phi's defs.
+      auto &Uses = UI->second;
+      for (auto I = Uses.begin(), E = Uses.end(); I != E; ) {
+        auto UA = DFG.addr<UseNode*>(*I);
+        NodeList RDs = getAllReachingDefs(UI->first, UA);
+        if (std::any_of(RDs.begin(), RDs.end(), HasDef))
+          ++I;
+        else
+          I = Uses.erase(I);
+      }
+      if (Uses.empty())
+        UI = RealUses.erase(UI);
+      else
+        ++UI;
+    }
+
+    // If this phi reaches some "real" uses, add it to the queue for upward
+    // propagation.
+    if (!RealUses.empty())
+      PhiUQ.push_back(PhiA.Id);
+
+    // Go over all phi uses and check if the reaching def is another phi.
+    // Collect the phis that are among the reaching defs of these uses.
+    // While traversing the list of reaching defs for each phi use, collect
+    // the set of registers defined between this phi (Phi) and the owner phi
+    // of the reaching def.
+    for (auto I : PhiRefs) {
+      if (!DFG.IsRef<NodeAttrs::Use>(I))
+        continue;
+      NodeAddr<UseNode*> UA = I;
+      auto &UpMap = PhiUp[UA.Id];
+      RegisterSet DefRRs;
+      for (NodeAddr<DefNode*> DA : getAllReachingDefs(UA)) {
+        if (DA.Addr->getFlags() & NodeAttrs::PhiRef)
+          UpMap[DA.Addr->getOwner(DFG).Id] = DefRRs;
+        else
+          DefRRs.insert(DA.Addr->getRegRef());
+      }
+    }
+  }
+
+  if (Trace) {
+    dbgs() << "Phi-up-to-phi map:\n";
+    for (auto I : PhiUp) {
+      dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {";
+      for (auto R : I.second)
+        dbgs() << ' ' << Print<NodeId>(R.first, DFG)
+               << Print<RegisterSet>(R.second, DFG);
+      dbgs() << " }\n";
+    }
+  }
+
+  // Propagate the reached registers up in the phi chain.
+  //
+  // The following type of situation needs careful handling:
+  //
+  //   phi d1<R1:0>  (1)
+  //        |
+  //   ... d2<R1>
+  //        |
+  //   phi u3<R1:0>  (2)
+  //        |
+  //   ... u4<R1>
+  //
+  // The phi node (2) defines a register pair R1:0, and reaches a "real"
+  // use u4 of just R1. The same phi node is also known to reach (upwards)
+  // the phi node (1). However, the use u4 is not reached by phi (1),
+  // because of the intervening definition d2 of R1. The data flow between
+  // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0.
+  //
+  // When propagating uses up the phi chains, get the all reaching defs
+  // for a given phi use, and traverse the list until the propagated ref
+  // is covered, or until or until reaching the final phi. Only assume
+  // that the reference reaches the phi in the latter case.
+
+  for (unsigned i = 0; i < PhiUQ.size(); ++i) {
+    auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
+    auto &RealUses = RealUseMap[PA.Id];
+    for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+      NodeAddr<UseNode*> UA = U;
+      auto &UpPhis = PhiUp[UA.Id];
+      for (auto UP : UpPhis) {
+        bool Changed = false;
+        auto &MidDefs = UP.second;
+        // Collect the set UpReached of uses that are reached by the current
+        // phi PA, and are not covered by any intervening def between PA and
+        // the upward phi UP.
+        RegisterSet UpReached;
+        for (auto T : RealUses) {
+          if (!isRestricted(PA, UA, T.first))
+            continue;
+          if (!RAI.covers(MidDefs, T.first))
+            UpReached.insert(T.first);
+        }
+        if (UpReached.empty())
+          continue;
+        // Update the set PRUs of real uses reached by the upward phi UP with
+        // the actual set of uses (UpReached) that the UP phi reaches.
+        auto &PRUs = RealUseMap[UP.first];
+        for (auto R : UpReached) {
+          unsigned Z = PRUs[R].size();
+          PRUs[R].insert(RealUses[R].begin(), RealUses[R].end());
+          Changed |= (PRUs[R].size() != Z);
+        }
+        if (Changed)
+          PhiUQ.push_back(UP.first);
+      }
+    }
+  }
+
+  if (Trace) {
+    dbgs() << "Real use map:\n";
+    for (auto I : RealUseMap) {
+      dbgs() << "phi " << Print<NodeId>(I.first, DFG);
+      NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first);
+      NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
+      if (!Ds.empty()) {
+        RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef();
+        dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>';
+      } else {
+        dbgs() << "<noreg>";
+      }
+      dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n';
+    }
+  }
+}
+
+
+void Liveness::computeLiveIns() {
+  // Populate the node-to-block map. This speeds up the calculations
+  // significantly.
+  NBMap.clear();
+  for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+    MachineBasicBlock *BB = BA.Addr->getCode();
+    for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+      for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+        NBMap.insert(std::make_pair(RA.Id, BB));
+      NBMap.insert(std::make_pair(IA.Id, BB));
+    }
+  }
+
+  MachineFunction &MF = DFG.getMF();
+
+  // Compute IDF first, then the inverse.
+  decltype(IIDF) IDF;
+  for (auto &B : MF) {
+    auto F1 = MDF.find(&B);
+    if (F1 == MDF.end())
+      continue;
+    SetVector<MachineBasicBlock*> IDFB(F1->second.begin(), F1->second.end());
+    for (unsigned i = 0; i < IDFB.size(); ++i) {
+      auto F2 = MDF.find(IDFB[i]);
+      if (F2 != MDF.end())
+        IDFB.insert(F2->second.begin(), F2->second.end());
+    }
+    // Add B to the IDF(B). This will put B in the IIDF(B).
+    IDFB.insert(&B);
+    IDF[&B].insert(IDFB.begin(), IDFB.end());
+  }
+
+  for (auto I : IDF)
+    for (auto S : I.second)
+      IIDF[S].insert(I.first);
+
+  computePhiInfo();
+
+  NodeAddr<FuncNode*> FA = DFG.getFunc();
+  auto Blocks = FA.Addr->members(DFG);
+
+  // Build the phi live-on-entry map.
+  for (NodeAddr<BlockNode*> BA : Blocks) {
+    MachineBasicBlock *MB = BA.Addr->getCode();
+    auto &LON = PhiLON[MB];
+    for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG))
+      for (auto S : RealUseMap[P.Id])
+        LON[S.first].insert(S.second.begin(), S.second.end());
+  }
+
+  if (Trace) {
+    dbgs() << "Phi live-on-entry map:\n";
+    for (auto I : PhiLON)
+      dbgs() << "block #" << I.first->getNumber() << " -> "
+             << Print<RefMap>(I.second, DFG) << '\n';
+  }
+
+  // Build the phi live-on-exit map. Each phi node has some set of reached
+  // "real" uses. Propagate this set backwards into the block predecessors
+  // through the reaching defs of the corresponding phi uses.
+  for (NodeAddr<BlockNode*> BA : Blocks) {
+    auto Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+    for (NodeAddr<PhiNode*> PA : Phis) {
+      auto &RUs = RealUseMap[PA.Id];
+      if (RUs.empty())
+        continue;
+
+      for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+        NodeAddr<PhiUseNode*> UA = U;
+        if (UA.Addr->getReachingDef() == 0)
+          continue;
+
+        // Mark all reached "real" uses of P as live on exit in the
+        // predecessor.
+        // Remap all the RUs so that they have a correct reaching def.
+        auto PrA = DFG.addr<BlockNode*>(UA.Addr->getPredecessor());
+        auto &LOX = PhiLOX[PrA.Addr->getCode()];
+        for (auto R : RUs) {
+          RegisterRef RR = R.first;
+          if (!isRestricted(PA, UA, RR))
+            RR = getRestrictedRegRef(UA);
+          // The restricted ref may be different from the ref that was
+          // accessed in the "real use". This means that this phi use
+          // is not the one that carries this reference, so skip it.
+          if (!RAI.alias(R.first, RR))
+            continue;
+          for (auto D : getAllReachingDefs(RR, UA))
+            LOX[RR].insert(D.Id);
+        }
+      }  // for U : phi uses
+    }  // for P : Phis
+  }  // for B : Blocks
+
+  if (Trace) {
+    dbgs() << "Phi live-on-exit map:\n";
+    for (auto I : PhiLOX)
+      dbgs() << "block #" << I.first->getNumber() << " -> "
+             << Print<RefMap>(I.second, DFG) << '\n';
+  }
+
+  RefMap LiveIn;
+  traverse(&MF.front(), LiveIn);
+
+  // Add function live-ins to the live-in set of the function entry block.
+  auto &EntryIn = LiveMap[&MF.front()];
+  for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I)
+    EntryIn.insert({I->first,0});
+
+  if (Trace) {
+    // Dump the liveness map
+    for (auto &B : MF) {
+      BitVector LV(TRI.getNumRegs());
+      for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
+        LV.set(I->PhysReg);
+      dbgs() << "BB#" << B.getNumber() << "\t rec = {";
+      for (int x = LV.find_first(); x >= 0; x = LV.find_next(x))
+        dbgs() << ' ' << Print<RegisterRef>({unsigned(x),0}, DFG);
+      dbgs() << " }\n";
+      dbgs() << "\tcomp = " << Print<RegisterSet>(LiveMap[&B], DFG) << '\n';
+    }
+  }
+}
+
+
+void Liveness::resetLiveIns() {
+  for (auto &B : DFG.getMF()) {
+    // Remove all live-ins.
+    std::vector<unsigned> T;
+    for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
+      T.push_back(I->PhysReg);
+    for (auto I : T)
+      B.removeLiveIn(I);
+    // Add the newly computed live-ins.
+    auto &LiveIns = LiveMap[&B];
+    for (auto I : LiveIns) {
+      assert(I.Sub == 0);
+      B.addLiveIn(I.Reg);
+    }
+  }
+}
+
+
+void Liveness::resetKills() {
+  for (auto &B : DFG.getMF())
+    resetKills(&B);
+}
+
+
+void Liveness::resetKills(MachineBasicBlock *B) {
+  auto CopyLiveIns = [] (MachineBasicBlock *B, BitVector &LV) -> void {
+    for (auto I = B->livein_begin(), E = B->livein_end(); I != E; ++I)
+      LV.set(I->PhysReg);
+  };
+
+  BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs());
+  CopyLiveIns(B, LiveIn);
+  for (auto SI : B->successors())
+    CopyLiveIns(SI, Live);
+
+  for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    if (MI->isDebugValue())
+      continue;
+
+    MI->clearKillInfo();
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+      unsigned R = Op.getReg();
+      if (!TargetRegisterInfo::isPhysicalRegister(R))
+        continue;
+      for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
+        Live.reset(*SR);
+    }
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isUse())
+        continue;
+      unsigned R = Op.getReg();
+      if (!TargetRegisterInfo::isPhysicalRegister(R))
+        continue;
+      bool IsLive = false;
+      for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) {
+        if (!Live[*SR])
+          continue;
+        IsLive = true;
+        break;
+      }
+      if (IsLive)
+        continue;
+      Op.setIsKill(true);
+      for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
+        Live.set(*SR);
+    }
+  }
+}
+
+
+// For shadows, determine if RR is aliased to a reaching def of any other
+// shadow associated with RA. If it is not, then RR is "restricted" to RA,
+// and so it can be considered a value specific to RA. This is important
+// for accurately determining values associated with phi uses.
+// For non-shadows, this function returns "true".
+bool Liveness::isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+      RegisterRef RR) const {
+  NodeId Start = RA.Id;
+  for (NodeAddr<RefNode*> TA = DFG.getNextShadow(IA, RA);
+       TA.Id != 0 && TA.Id != Start; TA = DFG.getNextShadow(IA, TA)) {
+    NodeId RD = TA.Addr->getReachingDef();
+    if (RD == 0)
+      continue;
+    if (RAI.alias(RR, DFG.addr<DefNode*>(RD).Addr->getRegRef()))
+      return false;
+  }
+  return true;
+}
+
+
+RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const {
+  assert(DFG.IsRef<NodeAttrs::Use>(RA));
+  if (RA.Addr->getFlags() & NodeAttrs::Shadow) {
+    NodeId RD = RA.Addr->getReachingDef();
+    assert(RD);
+    RA = DFG.addr<DefNode*>(RD);
+  }
+  return RA.Addr->getRegRef();
+}
+
+
+unsigned Liveness::getPhysReg(RegisterRef RR) const {
+  if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg))
+    return 0;
+  return RR.Sub ? TRI.getSubReg(RR.Reg, RR.Sub) : RR.Reg;
+}
+
+
+// Helper function to obtain the basic block containing the reaching def
+// of the given use.
+MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const {
+  auto F = NBMap.find(RN);
+  if (F != NBMap.end())
+    return F->second;
+  llvm_unreachable("Node id not in map");
+}
+
+
+void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
+  // The LiveIn map, for each (physical) register, contains the set of live
+  // reaching defs of that register that are live on entry to the associated
+  // block.
+
+  // The summary of the traversal algorithm:
+  //
+  // R is live-in in B, if there exists a U(R), such that rdef(R) dom B
+  // and (U \in IDF(B) or B dom U).
+  //
+  // for (C : children) {
+  //   LU = {}
+  //   traverse(C, LU)
+  //   LiveUses += LU
+  // }
+  //
+  // LiveUses -= Defs(B);
+  // LiveUses += UpwardExposedUses(B);
+  // for (C : IIDF[B])
+  //   for (U : LiveUses)
+  //     if (Rdef(U) dom C)
+  //       C.addLiveIn(U)
+  //
+
+  // Go up the dominator tree (depth-first).
+  MachineDomTreeNode *N = MDT.getNode(B);
+  for (auto I : *N) {
+    RefMap L;
+    MachineBasicBlock *SB = I->getBlock();
+    traverse(SB, L);
+
+    for (auto S : L)
+      LiveIn[S.first].insert(S.second.begin(), S.second.end());
+  }
+
+  if (Trace) {
+    dbgs() << LLVM_FUNCTION_NAME << " in BB#" << B->getNumber()
+           << " after recursion into";
+    for (auto I : *N)
+      dbgs() << ' ' << I->getBlock()->getNumber();
+    dbgs() << "\n  LiveIn: " << Print<RefMap>(LiveIn, DFG);
+    dbgs() << "\n  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+  }
+
+  // Add phi uses that are live on exit from this block.
+  RefMap &PUs = PhiLOX[B];
+  for (auto S : PUs)
+    LiveIn[S.first].insert(S.second.begin(), S.second.end());
+
+  if (Trace) {
+    dbgs() << "after LOX\n";
+    dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+  }
+
+  // Stop tracking all uses defined in this block: erase those records
+  // where the reaching def is located in B and which cover all reached
+  // uses.
+  auto Copy = LiveIn;
+  LiveIn.clear();
+
+  for (auto I : Copy) {
+    auto &Defs = LiveIn[I.first];
+    NodeSet Rest;
+    for (auto R : I.second) {
+      auto DA = DFG.addr<DefNode*>(R);
+      RegisterRef DDR = DA.Addr->getRegRef();
+      NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
+      NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+      // Defs from a different block need to be preserved. Defs from this
+      // block will need to be processed further, except for phi defs, the
+      // liveness of which is handled through the PhiLON/PhiLOX maps.
+      if (B != BA.Addr->getCode())
+        Defs.insert(R);
+      else {
+        bool IsPreserving = DA.Addr->getFlags() & NodeAttrs::Preserving;
+        if (IA.Addr->getKind() != NodeAttrs::Phi && !IsPreserving) {
+          bool Covering = RAI.covers(DDR, I.first);
+          NodeId U = DA.Addr->getReachedUse();
+          while (U && Covering) {
+            auto DUA = DFG.addr<UseNode*>(U);
+            RegisterRef Q = DUA.Addr->getRegRef();
+            Covering = RAI.covers(DA.Addr->getRegRef(), Q);
+            U = DUA.Addr->getSibling();
+          }
+          if (!Covering)
+            Rest.insert(R);
+        }
+      }
+    }
+
+    // Non-covering defs from B.
+    for (auto R : Rest) {
+      auto DA = DFG.addr<DefNode*>(R);
+      RegisterRef DRR = DA.Addr->getRegRef();
+      RegisterSet RRs;
+      for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) {
+        NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG);
+        NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+        // Preserving defs do not count towards covering.
+        if (!(TA.Addr->getFlags() & NodeAttrs::Preserving))
+          RRs.insert(TA.Addr->getRegRef());
+        if (BA.Addr->getCode() == B)
+          continue;
+        if (RAI.covers(RRs, DRR))
+          break;
+        Defs.insert(TA.Id);
+      }
+    }
+  }
+
+  emptify(LiveIn);
+
+  if (Trace) {
+    dbgs() << "after defs in block\n";
+    dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+  }
+
+  // Scan the block for upward-exposed uses and add them to the tracking set.
+  for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) {
+    NodeAddr<InstrNode*> IA = I;
+    if (IA.Addr->getKind() != NodeAttrs::Stmt)
+      continue;
+    for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+      RegisterRef RR = UA.Addr->getRegRef();
+      for (auto D : getAllReachingDefs(UA))
+        if (getBlockWithRef(D.Id) != B)
+          LiveIn[RR].insert(D.Id);
+    }
+  }
+
+  if (Trace) {
+    dbgs() << "after uses in block\n";
+    dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+  }
+
+  // Phi uses should not be propagated up the dominator tree, since they
+  // are not dominated by their corresponding reaching defs.
+  auto &Local = LiveMap[B];
+  auto &LON = PhiLON[B];
+  for (auto R : LON)
+    Local.insert(R.first);
+
+  if (Trace) {
+    dbgs() << "after phi uses in block\n";
+    dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterSet>(Local, DFG) << '\n';
+  }
+
+  for (auto C : IIDF[B]) {
+    auto &LiveC = LiveMap[C];
+    for (auto S : LiveIn)
+      for (auto R : S.second)
+        if (MDT.properlyDominates(getBlockWithRef(R), C))
+          LiveC.insert(S.first);
+  }
+}
+
+
+void Liveness::emptify(RefMap &M) {
+  for (auto I = M.begin(), E = M.end(); I != E; )
+    I = I->second.empty() ? M.erase(I) : std::next(I);
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h
new file mode 100644
index 0000000..4c1e8f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h
@@ -0,0 +1,106 @@
+//===--- RDFLiveness.h ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Recalculate the liveness information given a data flow graph.
+// This includes block live-ins and kill flags.
+
+#ifndef RDF_LIVENESS_H
+#define RDF_LIVENESS_H
+
+#include "RDFGraph.h"
+#include "llvm/ADT/DenseMap.h"
+#include <map>
+
+using namespace llvm;
+
+namespace llvm {
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineRegisterInfo;
+  class TargetRegisterInfo;
+  class MachineDominatorTree;
+  class MachineDominanceFrontier;
+}
+
+namespace rdf {
+  struct Liveness {
+  public:
+    typedef std::map<MachineBasicBlock*,RegisterSet> LiveMapType;
+    typedef std::map<RegisterRef,NodeSet> RefMap;
+
+    Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
+      : DFG(g), TRI(g.getTRI()), MDT(g.getDT()), MDF(g.getDF()),
+        RAI(g.getRAI()), MRI(mri), Empty(), Trace(false) {}
+
+    NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
+        bool FullChain = false, const RegisterSet &DefRRs = RegisterSet());
+    NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA);
+
+    LiveMapType &getLiveMap() { return LiveMap; }
+    const LiveMapType &getLiveMap() const { return LiveMap; }
+    const RefMap &getRealUses(NodeId P) const {
+      auto F = RealUseMap.find(P);
+      return F == RealUseMap.end() ? Empty : F->second;
+    }
+
+    void computePhiInfo();
+    void computeLiveIns();
+    void resetLiveIns();
+    void resetKills();
+    void resetKills(MachineBasicBlock *B);
+
+    void trace(bool T) { Trace = T; }
+
+  private:
+    const DataFlowGraph &DFG;
+    const TargetRegisterInfo &TRI;
+    const MachineDominatorTree &MDT;
+    const MachineDominanceFrontier &MDF;
+    const RegisterAliasInfo &RAI;
+    MachineRegisterInfo &MRI;
+    LiveMapType LiveMap;
+    const RefMap Empty;
+    bool Trace;
+
+    // Cache of mapping from node ids (for RefNodes) to the containing
+    // basic blocks. Not computing it each time for each node reduces
+    // the liveness calculation time by a large fraction.
+    typedef DenseMap<NodeId,MachineBasicBlock*> NodeBlockMap;
+    NodeBlockMap NBMap;
+
+    // Phi information:
+    //
+    // map: NodeId -> (map: RegisterRef -> NodeSet)
+    //      phi id -> (map: register -> set of reached non-phi uses)
+    std::map<NodeId, RefMap> RealUseMap;
+
+    // Inverse iterated dominance frontier.
+    std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF;
+
+    // Live on entry.
+    std::map<MachineBasicBlock*,RefMap> PhiLON;
+
+    // Phi uses are considered to be located at the end of the block that
+    // they are associated with. The reaching def of a phi use dominates the
+    // block that the use corresponds to, but not the block that contains
+    // the phi itself. To include these uses in the liveness propagation (up
+    // the dominator tree), create a map: block -> set of uses live on exit.
+    std::map<MachineBasicBlock*,RefMap> PhiLOX;
+
+    bool isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+        RegisterRef RR) const;
+    RegisterRef getRestrictedRegRef(NodeAddr<RefNode*> RA) const;
+    unsigned getPhysReg(RegisterRef RR) const;
+    MachineBasicBlock *getBlockWithRef(NodeId RN) const;
+    void traverse(MachineBasicBlock *B, RefMap &LiveIn);
+    void emptify(RefMap &M);
+  };
+}
+
+#endif // RDF_LIVENESS_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 6756c17..5680130 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -277,8 +277,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
-  setOperationAction(ISD::SELECT_CC,          MVT::f32,   Custom);
-  setOperationAction(ISD::SELECT_CC,          MVT::f64,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
   setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
@@ -327,6 +325,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::BR_CC,             MVT::i64,   Expand);
   setOperationAction(ISD::SELECT_CC,         MVT::i32,   Expand);
   setOperationAction(ISD::SELECT_CC,         MVT::i64,   Expand);
+  setOperationAction(ISD::SELECT_CC,         MVT::f32,   Expand);
+  setOperationAction(ISD::SELECT_CC,         MVT::f64,   Expand);
   setOperationAction(ISD::UINT_TO_FP,        MVT::i32,   Expand);
   setOperationAction(ISD::UINT_TO_FP,        MVT::i64,   Expand);
   setOperationAction(ISD::FP_TO_UINT,        MVT::i32,   Expand);
@@ -872,7 +872,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::GlobalTLSAddress:   return lowerGlobalTLSAddress(Op, DAG);
   case ISD::JumpTable:          return lowerJumpTable(Op, DAG);
   case ISD::SELECT:             return lowerSELECT(Op, DAG);
-  case ISD::SELECT_CC:          return lowerSELECT_CC(Op, DAG);
   case ISD::SETCC:              return lowerSETCC(Op, DAG);
   case ISD::VASTART:            return lowerVASTART(Op, DAG);
   case ISD::VAARG:              return lowerVAARG(Op, DAG);
@@ -1648,20 +1647,6 @@ lowerSELECT(SDValue Op, SelectionDAG &DAG) const
                       SDLoc(Op));
 }
 
-SDValue MipsTargetLowering::
-lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
-{
-  SDLoc DL(Op);
-  EVT Ty = Op.getOperand(0).getValueType();
-  SDValue Cond =
-      DAG.getNode(ISD::SETCC, DL, getSetCCResultType(DAG.getDataLayout(),
-                                                     *DAG.getContext(), Ty),
-                  Op.getOperand(0), Op.getOperand(1), Op.getOperand(4));
-
-  return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2),
-                     Op.getOperand(3));
-}
-
 SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6());
   SDValue Cond = createFPCmp(DAG, Op);
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index b33e125..0dc683e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -430,7 +430,6 @@ namespace llvm {
     SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index d9fb8c8..ffda491 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -1003,7 +1003,7 @@ class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> {
 let isCall=1, hasDelaySlot=1, Defs = [RA] in {
   class JumpLink<string opstr, DAGOperand opnd> :
     InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
-           [(MipsJmpLink imm:$target)], II_JAL, FrmJ, opstr> {
+           [(MipsJmpLink tglobaladdr:$target)], II_JAL, FrmJ, opstr> {
     let DecoderMethod = "DecodeJumpTarget";
   }
 
@@ -2075,8 +2075,6 @@ def : MipsPat<(MipsSync (i32 immz)),
               (SYNC 0)>, ISA_MIPS2;
 
 // Call
-def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
-              (JAL tglobaladdr:$dst)>;
 def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
               (JAL texternalsym:$dst)>;
 //def : MipsPat<(MipsJmpLink GPR32:$dst),
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index e6f7fe9..d4aeaf9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -544,8 +544,6 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
   const MachineOperand &SrcLo = I->getOperand(1), &SrcHi = I->getOperand(2);
   MachineInstrBuilder LoInst = BuildMI(MBB, I, DL, get(LoOpc));
   MachineInstrBuilder HiInst = BuildMI(MBB, I, DL, get(HiOpc));
-  LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill()));
-  HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill()));
 
   // Add lo/hi registers if the mtlo/hi instructions created have explicit
   // def registers.
@@ -556,6 +554,9 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
     LoInst.addReg(DstLo, RegState::Define);
     HiInst.addReg(DstHi, RegState::Define);
   }
+
+  LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill()));
+  HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill()));
 }
 
 void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7663696..be735f6 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4549,6 +4549,7 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
   delete static_cast<NVPTXSection *>(DwarfLocSection);
   delete static_cast<NVPTXSection *>(DwarfARangesSection);
   delete static_cast<NVPTXSection *>(DwarfRangesSection);
+  delete static_cast<NVPTXSection *>(DwarfMacinfoSection);
 }
 
 MCSection *
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 0f88ddf..683b9a3 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -41,6 +41,7 @@ public:
     DwarfLocSection = nullptr;
     DwarfARangesSection = nullptr;
     DwarfRangesSection = nullptr;
+    DwarfMacinfoSection = nullptr;
   }
 
   virtual ~NVPTXTargetObjectFile();
@@ -81,6 +82,8 @@ public:
         new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
     DwarfRangesSection =
         new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfMacinfoSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
   }
 
   MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 9a63c14..ec354c2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1092,8 +1092,28 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   }
 
   // ELFv2 ABI - Normal entry label.
-  if (Subtarget->isELFv2ABI())
+  if (Subtarget->isELFv2ABI()) {
+    // In the Large code model, we allow arbitrary displacements between
+    // the text section and its associated TOC section.  We place the
+    // full 8-byte offset to the TOC in memory immediatedly preceding
+    // the function global entry point.
+    if (TM.getCodeModel() == CodeModel::Large
+        && !MF->getRegInfo().use_empty(PPC::X2)) {
+      const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+
+      MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+      MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol();
+      const MCExpr *TOCDeltaExpr =
+        MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+                                MCSymbolRefExpr::create(GlobalEPSymbol,
+                                                        OutContext),
+                                OutContext);
+
+      OutStreamer->EmitLabel(PPCFI->getTOCOffsetSymbol());
+      OutStreamer->EmitValue(TOCDeltaExpr, 8);
+    }
     return AsmPrinter::EmitFunctionEntryLabel();
+  }
 
   // Emit an official procedure descriptor.
   MCSectionSubPair Current = OutStreamer->getCurrentSection();
@@ -1160,10 +1180,25 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
   // thus emit a prefix sequence along the following lines:
   //
   // func:
+  // .Lfunc_gepNN:
+  //         # global entry point
+  //         addis r2,r12,(.TOC.-.Lfunc_gepNN)@ha
+  //         addi  r2,r2,(.TOC.-.Lfunc_gepNN)@l
+  // .Lfunc_lepNN:
+  //         .localentry func, .Lfunc_lepNN-.Lfunc_gepNN
+  //         # local entry point, followed by function body
+  //
+  // For the Large code model, we create
+  //
+  // .Lfunc_tocNN:
+  //         .quad .TOC.-.Lfunc_gepNN      # done by EmitFunctionEntryLabel
+  // func:
+  // .Lfunc_gepNN:
   //         # global entry point
-  //         addis r2,r12,(.TOC.-func)@ha
-  //         addi  r2,r2,(.TOC.-func)@l
-  //         .localentry func, .-func
+  //         ld    r2,.Lfunc_tocNN-.Lfunc_gepNN(r12)
+  //         add   r2,r2,r12
+  // .Lfunc_lepNN:
+  //         .localentry func, .Lfunc_lepNN-.Lfunc_gepNN
   //         # local entry point, followed by function body
   //
   // This ensures we have r2 set up correctly while executing the function
@@ -1171,32 +1206,49 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
   if (Subtarget->isELFv2ABI()
       // Only do all that if the function uses r2 in the first place.
       && !MF->getRegInfo().use_empty(PPC::X2)) {
+    const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
 
-    MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol();
+    MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
     OutStreamer->EmitLabel(GlobalEntryLabel);
     const MCSymbolRefExpr *GlobalEntryLabelExp =
       MCSymbolRefExpr::create(GlobalEntryLabel, OutContext);
 
-    MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
-    const MCExpr *TOCDeltaExpr =
-      MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
-                              GlobalEntryLabelExp, OutContext);
+    if (TM.getCodeModel() != CodeModel::Large) {
+      MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+      const MCExpr *TOCDeltaExpr =
+        MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+                                GlobalEntryLabelExp, OutContext);
 
-    const MCExpr *TOCDeltaHi =
-      PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
-    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
-                                 .addReg(PPC::X2)
-                                 .addReg(PPC::X12)
-                                 .addExpr(TOCDeltaHi));
-
-    const MCExpr *TOCDeltaLo =
-      PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
-    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
-                                 .addReg(PPC::X2)
-                                 .addReg(PPC::X2)
-                                 .addExpr(TOCDeltaLo));
-
-    MCSymbol *LocalEntryLabel = OutContext.createTempSymbol();
+      const MCExpr *TOCDeltaHi =
+        PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+                                   .addReg(PPC::X2)
+                                   .addReg(PPC::X12)
+                                   .addExpr(TOCDeltaHi));
+
+      const MCExpr *TOCDeltaLo =
+        PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
+                                   .addReg(PPC::X2)
+                                   .addReg(PPC::X2)
+                                   .addExpr(TOCDeltaLo));
+    } else {
+      MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol();
+      const MCExpr *TOCOffsetDeltaExpr =
+        MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCOffset, OutContext),
+                                GlobalEntryLabelExp, OutContext);
+
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+                                   .addReg(PPC::X2)
+                                   .addExpr(TOCOffsetDeltaExpr)
+                                   .addReg(PPC::X12));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADD8)
+                                   .addReg(PPC::X2)
+                                   .addReg(PPC::X2)
+                                   .addReg(PPC::X12));
+    }
+
+    MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol();
     OutStreamer->EmitLabel(LocalEntryLabel);
     const MCSymbolRefExpr *LocalEntryLabelExp =
        MCSymbolRefExpr::create(LocalEntryLabel, OutContext);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 075e093..79e4fe3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -299,22 +299,35 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
 // 64-bit CR instructions
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
 let hasSideEffects = 0 in {
+// mtocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraDefRegAllocReq = 1 in {
 def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
                         "mtocrf $FXM, $ST", IIC_BrMCRX>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
 
+// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that
+// is dependent on the cr fields being set.
 def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
                       "mtcrf $FXM, $rS", IIC_BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraDefRegAllocReq = 1
 
-let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+// mfocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraSrcRegAllocReq = 1 in {
 def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
                         "mfocrf $rT, $FXM", IIC_SprMFCRF>,
              PPC970_DGroup_First, PPC970_Unit_CRU;
 
+// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that
+// is dependent on the cr fields being copied.
 def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
                      "mfcr $rT", IIC_SprMFCR>,
                      PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraSrcRegAllocReq = 1
 } // hasSideEffects = 0
 
 let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index c17603a..dcff6ad 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -744,20 +744,43 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
          "isel is for regular integer GPRs only");
 
   unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
-  unsigned SelectPred = Cond[0].getImm();
+  auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
 
   unsigned SubIdx;
   bool SwapOps;
   switch (SelectPred) {
-  default: llvm_unreachable("invalid predicate for isel");
-  case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
-  case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
-  case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
-  case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
-  case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
-  case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
-  case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
-  case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+  case PPC::PRED_EQ:
+  case PPC::PRED_EQ_MINUS:
+  case PPC::PRED_EQ_PLUS:
+      SubIdx = PPC::sub_eq; SwapOps = false; break;
+  case PPC::PRED_NE:
+  case PPC::PRED_NE_MINUS:
+  case PPC::PRED_NE_PLUS:
+      SubIdx = PPC::sub_eq; SwapOps = true; break;
+  case PPC::PRED_LT:
+  case PPC::PRED_LT_MINUS:
+  case PPC::PRED_LT_PLUS:
+      SubIdx = PPC::sub_lt; SwapOps = false; break;
+  case PPC::PRED_GE:
+  case PPC::PRED_GE_MINUS:
+  case PPC::PRED_GE_PLUS:
+      SubIdx = PPC::sub_lt; SwapOps = true; break;
+  case PPC::PRED_GT:
+  case PPC::PRED_GT_MINUS:
+  case PPC::PRED_GT_PLUS:
+      SubIdx = PPC::sub_gt; SwapOps = false; break;
+  case PPC::PRED_LE:
+  case PPC::PRED_LE_MINUS:
+  case PPC::PRED_LE_PLUS:
+      SubIdx = PPC::sub_gt; SwapOps = true; break;
+  case PPC::PRED_UN:
+  case PPC::PRED_UN_MINUS:
+  case PPC::PRED_UN_PLUS:
+      SubIdx = PPC::sub_un; SwapOps = false; break;
+  case PPC::PRED_NU:
+  case PPC::PRED_NU_MINUS:
+  case PPC::PRED_NU_PLUS:
+      SubIdx = PPC::sub_un; SwapOps = true; break;
   case PPC::PRED_BIT_SET:   SubIdx = 0; SwapOps = false; break;
   case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
   }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 6c4364a..ce0f9e6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2299,22 +2299,35 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
                      "#RESTORE_VRSAVE", []>;
 
 let hasSideEffects = 0 in {
+// mtocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraDefRegAllocReq = 1 in {
 def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
                        "mtocrf $FXM, $ST", IIC_BrMCRX>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
 
+// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that
+// is dependent on the cr fields being set.
 def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
                       "mtcrf $FXM, $rS", IIC_BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraDefRegAllocReq = 1
 
-let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+// mfocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraSrcRegAllocReq = 1 in {
 def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
                        "mfocrf $rT, $FXM", IIC_SprMFCRF>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
 
+// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that
+// is dependent on the cr fields being copied.
 def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
                      "mfcr $rT", IIC_SprMFCR>,
                      PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraSrcRegAllocReq = 1
 } // hasSideEffects = 0
 
 // Pseudo instruction to perform FADD in round-to-zero mode.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 95f1631..9d91e31 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -23,3 +23,24 @@ MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
                                            Twine(MF.getFunctionNumber()) +
                                            "$poff");
 }
+
+MCSymbol *PPCFunctionInfo::getGlobalEPSymbol() const {
+  const DataLayout &DL = MF.getDataLayout();
+  return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+                                           "func_gep" +
+                                           Twine(MF.getFunctionNumber()));
+}
+
+MCSymbol *PPCFunctionInfo::getLocalEPSymbol() const {
+  const DataLayout &DL = MF.getDataLayout();
+  return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+                                           "func_lep" +
+                                           Twine(MF.getFunctionNumber()));
+}
+
+MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol() const {
+  const DataLayout &DL = MF.getDataLayout();
+  return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+                                           "func_toc" +
+                                           Twine(MF.getFunctionNumber()));
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 607cdf6..10a8ce0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -197,6 +197,10 @@ public:
   bool usesPICBase() const { return UsesPICBase; }
 
   MCSymbol *getPICOffsetSymbol() const;
+
+  MCSymbol *getGlobalEPSymbol() const;
+  MCSymbol *getLocalEPSymbol() const;
+  MCSymbol *getTOCOffsetSymbol() const;
 };
 
 } // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 2dc0d82..a9d2e88 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -99,6 +99,11 @@ protected:
           break;
         }
 
+        // Don't really need to save data to the stack - the clobbered
+        // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
+        // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
+        BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0);
+
         // Expand into two ops built prior to the existing instruction.
         MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
           .addReg(InReg);
@@ -113,6 +118,8 @@ protected:
                               .addReg(GPR3));
         Call->addOperand(MI->getOperand(3));
 
+        BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
+
         BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
           .addReg(GPR3);
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 733027a..05006ac 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -83,7 +83,6 @@ static bool IsIntegerCC(unsigned CC)
   return  (CC <= SPCC::ICC_VC);
 }
 
-
 static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
 {
   switch(CC) {
@@ -124,106 +123,103 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
   llvm_unreachable("Invalid cond code");
 }
 
-bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                   MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const
-{
-
-  MachineBasicBlock::iterator I = MBB.end();
-  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
-  while (I != MBB.begin()) {
-    --I;
+static bool isUncondBranchOpcode(int Opc) { return Opc == SP::BA; }
 
-    if (I->isDebugValue())
-      continue;
+static bool isCondBranchOpcode(int Opc) {
+  return Opc == SP::FBCOND || Opc == SP::BCOND;
+}
 
-    // When we see a non-terminator, we are done.
-    if (!isUnpredicatedTerminator(I))
-      break;
+static bool isIndirectBranchOpcode(int Opc) {
+  return Opc == SP::BINDrr || Opc == SP::BINDri;
+}
 
-    // Terminator is not a branch.
-    if (!I->isBranch())
-      return true;
+static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
+                            SmallVectorImpl<MachineOperand> &Cond) {
+  Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(1).getImm()));
+  Target = LastInst->getOperand(0).getMBB();
+}
 
-    // Handle Unconditional branches.
-    if (I->getOpcode() == SP::BA) {
-      UnCondBrIter = I;
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const {
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return false;
+
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+  unsigned LastOpc = LastInst->getOpcode();
+
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (isUncondBranchOpcode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isCondBranchOpcode(LastOpc)) {
+      // Block ends with fall-through condbranch.
+      parseCondBranch(LastInst, TBB, Cond);
+      return false;
+    }
+    return true; // Can't handle indirect branch.
+  }
 
-      if (!AllowModify) {
-        TBB = I->getOperand(0).getMBB();
-        continue;
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+    while (isUncondBranchOpcode(SecondLastOpc)) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+        // Return now the only terminator is an unconditional branch.
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        SecondLastInst = I;
+        SecondLastOpc = SecondLastInst->getOpcode();
       }
+    }
+  }
 
-      while (std::next(I) != MBB.end())
-        std::next(I)->eraseFromParent();
-
-      Cond.clear();
-      FBB = nullptr;
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
 
-      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
-        TBB = nullptr;
-        I->eraseFromParent();
-        I = MBB.end();
-        UnCondBrIter = MBB.end();
-        continue;
-      }
+  // If the block ends with a B and a Bcc, handle it.
+  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    parseCondBranch(SecondLastInst, TBB, Cond);
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
 
-      TBB = I->getOperand(0).getMBB();
-      continue;
-    }
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed.
+  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    return false;
+  }
 
-    unsigned Opcode = I->getOpcode();
-    if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
-      return true; // Unknown Opcode.
-
-    SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
-
-    if (Cond.empty()) {
-      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
-      if (AllowModify && UnCondBrIter != MBB.end() &&
-          MBB.isLayoutSuccessor(TargetBB)) {
-
-        // Transform the code
-        //
-        //    brCC L1
-        //    ba L2
-        // L1:
-        //    ..
-        // L2:
-        //
-        // into
-        //
-        //   brnCC L2
-        // L1:
-        //   ...
-        // L2:
-        //
-        BranchCode = GetOppositeBranchCondition(BranchCode);
-        MachineBasicBlock::iterator OldInst = I;
-        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
-          .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
-        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
-          .addMBB(TargetBB);
-
-        OldInst->eraseFromParent();
-        UnCondBrIter->eraseFromParent();
-
-        UnCondBrIter = MBB.end();
-        I = MBB.end();
-        continue;
-      }
-      FBB = TBB;
-      TBB = I->getOperand(0).getMBB();
-      Cond.push_back(MachineOperand::CreateImm(BranchCode));
-      continue;
-    }
-    // FIXME: Handle subsequent conditional branches.
-    // For now, we can't handle multiple conditional branches.
+  // ...likewise if it ends with an indirect branch followed by an unconditional
+  // branch.
+  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
     return true;
   }
-  return false;
+
+  // Otherwise, can't handle this.
+  return true;
 }
 
 unsigned
@@ -277,6 +273,14 @@ unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
   return Count;
 }
 
+bool SparcInstrInfo::ReverseBranchCondition(
+    SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1);
+  SPCC::CondCodes CC = static_cast<SPCC::CondCodes>(Cond[0].getImm());
+  Cond[0].setImm(GetOppositeBranchCondition(CC));
+  return false;
+}
+
 void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator I, DebugLoc DL,
                                  unsigned DestReg, unsigned SrcReg,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
index 15673f1..9de624c 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -76,6 +76,9 @@ public:
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
+  bool
+  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
   void copyPhysReg(MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator I, DebugLoc DL,
                    unsigned DestReg, unsigned SrcReg,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index b9f2eb5..d5dabc2 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1219,6 +1219,9 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
 // Atomic operations
 //===----------------------------------------------------------------------===//
 
+// A serialization instruction that acts as a barrier for all memory
+// accesses, which expands to "bcr 14, 0".
+let hasSideEffects = 1 in
 def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
 
 let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000..5e55e29
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMWebAssemblyDisassembler
+  WebAssemblyDisassembler.cpp
+  )
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt b/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
new file mode 100644
index 0000000..a452ca1
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===-- ./lib/Target/WebAssembly/Disassembler/LLVMBuild.txt -----*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyDisassembler
+parent = WebAssembly
+required_libraries = MCDisassembler WebAssemblyInfo Support
+add_to_library_groups = WebAssembly
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile b/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile
new file mode 100644
index 0000000..bcd36ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===-- lib/Target/WebAssembly/Disassembler/Makefile -------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyDisassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
new file mode 100644
index 0000000..0143b10
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -0,0 +1,148 @@
+//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file is part of the WebAssembly Disassembler.
+///
+/// It contains code to translate the data produced by the decoder into
+/// MCInsts.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-disassembler"
+
+namespace {
+class WebAssemblyDisassembler final : public MCDisassembler {
+  std::unique_ptr<const MCInstrInfo> MCII;
+
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &VStream,
+                              raw_ostream &CStream) const override;
+
+public:
+  WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+                          std::unique_ptr<const MCInstrInfo> MCII)
+      : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
+};
+} // end anonymous namespace
+
+static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
+                                                     const MCSubtargetInfo &STI,
+                                                     MCContext &Ctx) {
+  std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
+  return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
+}
+
+extern "C" void LLVMInitializeWebAssemblyDisassembler() {
+  // Register the disassembler for each target.
+  TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget32,
+                                         createWebAssemblyDisassembler);
+  TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget64,
+                                         createWebAssemblyDisassembler);
+}
+
+MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
+    MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
+    raw_ostream &OS, raw_ostream &CS) const {
+  Size = 0;
+  uint64_t Pos = 0;
+
+  // Read the opcode.
+  if (Pos + sizeof(uint64_t) > Bytes.size())
+    return MCDisassembler::Fail;
+  uint64_t Opcode = support::endian::read64le(Bytes.data() + Pos);
+  Pos += sizeof(uint64_t);
+
+  if (Opcode >= WebAssembly::INSTRUCTION_LIST_END)
+    return MCDisassembler::Fail;
+
+  MI.setOpcode(Opcode);
+  const MCInstrDesc &Desc = MCII->get(Opcode);
+  unsigned NumFixedOperands = Desc.NumOperands;
+
+  // If it's variadic, read the number of extra operands.
+  unsigned NumExtraOperands = 0;
+  if (Desc.isVariadic()) {
+    if (Pos + sizeof(uint64_t) > Bytes.size())
+      return MCDisassembler::Fail;
+    NumExtraOperands = support::endian::read64le(Bytes.data() + Pos);
+    Pos += sizeof(uint64_t);
+  }
+
+  // Read the fixed operands. These are described by the MCInstrDesc.
+  for (unsigned i = 0; i < NumFixedOperands; ++i) {
+    const MCOperandInfo &Info = Desc.OpInfo[i];
+    switch (Info.OperandType) {
+    case MCOI::OPERAND_IMMEDIATE:
+    case WebAssembly::OPERAND_BASIC_BLOCK: {
+      if (Pos + sizeof(uint64_t) > Bytes.size())
+        return MCDisassembler::Fail;
+      uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
+      Pos += sizeof(uint64_t);
+      MI.addOperand(MCOperand::createImm(Imm));
+      break;
+    }
+    case MCOI::OPERAND_REGISTER: {
+      if (Pos + sizeof(uint64_t) > Bytes.size())
+        return MCDisassembler::Fail;
+      uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
+      Pos += sizeof(uint64_t);
+      MI.addOperand(MCOperand::createReg(Reg));
+      break;
+    }
+    case WebAssembly::OPERAND_FPIMM: {
+      // TODO: MC converts all floating point immediate operands to double.
+      // This is fine for numeric values, but may cause NaNs to change bits.
+      if (Pos + sizeof(uint64_t) > Bytes.size())
+        return MCDisassembler::Fail;
+      uint64_t Bits = support::endian::read64le(Bytes.data() + Pos);
+      Pos += sizeof(uint64_t);
+      double Imm;
+      memcpy(&Imm, &Bits, sizeof(Imm));
+      MI.addOperand(MCOperand::createFPImm(Imm));
+      break;
+    }
+    default:
+      llvm_unreachable("unimplemented operand kind");
+    }
+  }
+
+  // Read the extra operands.
+  assert(NumExtraOperands == 0 || Desc.isVariadic());
+  for (unsigned i = 0; i < NumExtraOperands; ++i) {
+    if (Pos + sizeof(uint64_t) > Bytes.size())
+      return MCDisassembler::Fail;
+    if (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate) {
+      // Decode extra immediate operands.
+      uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
+      MI.addOperand(MCOperand::createImm(Imm));
+    } else {
+      // Decode extra register operands.
+      uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
+      MI.addOperand(MCOperand::createReg(Reg));
+    }
+    Pos += sizeof(uint64_t);
+  }
+
+  Size = Pos;
+  return MCDisassembler::Success;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 7ce3a00..9a95150 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -16,6 +16,8 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -33,7 +35,7 @@ using namespace llvm;
 WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
                                                const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
+    : MCInstPrinter(MAI, MII, MRI), ControlFlowCounter(0) {}
 
 void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
                                           unsigned RegNo) const {
@@ -59,6 +61,52 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
 
   // Print any added annotation.
   printAnnotation(OS, Annot);
+
+  if (CommentStream) {
+    // Observe any effects on the control flow stack, for use in annotating
+    // control flow label references.
+    switch (MI->getOpcode()) {
+    default:
+      break;
+    case WebAssembly::LOOP: {
+      // Grab the TopLabel value first so that labels print in numeric order.
+      uint64_t TopLabel = ControlFlowCounter++;
+      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+      printAnnotation(OS, "label" + utostr(TopLabel) + ':');
+      ControlFlowStack.push_back(std::make_pair(TopLabel, true));
+      break;
+    }
+    case WebAssembly::BLOCK:
+      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+      break;
+    case WebAssembly::END_LOOP:
+      ControlFlowStack.pop_back();
+      printAnnotation(
+          OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+      break;
+    case WebAssembly::END_BLOCK:
+      printAnnotation(
+          OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+      break;
+    }
+
+    // Annotate any control flow label references.
+    unsigned NumFixedOperands = Desc.NumOperands;
+    SmallSet<uint64_t, 8> Printed;
+    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+      const MCOperandInfo &Info = Desc.OpInfo[i];
+      if (!(i < NumFixedOperands
+                ? (Info.OperandType == WebAssembly::OPERAND_BASIC_BLOCK)
+                : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel)))
+        continue;
+      uint64_t Depth = MI->getOperand(i).getImm();
+      if (!Printed.insert(Depth).second)
+        continue;
+      const auto &Pair = ControlFlowStack.rbegin()[Depth];
+      printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") +
+                              " to label" + utostr(Pair.first));
+    }
+  }
 }
 
 static std::string toString(const APFloat &FP) {
@@ -82,6 +130,9 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                           raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
+    assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+            MII.get(MI->getOpcode()).TSFlags == 0) &&
+           "WebAssembly variable_ops register ops don't use TSFlags");
     unsigned WAReg = Op.getReg();
     if (int(WAReg) >= 0)
       printRegName(O, WAReg);
@@ -95,19 +146,27 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
       O << '=';
   } else if (Op.isImm()) {
-    switch (MI->getOpcode()) {
-    case WebAssembly::PARAM:
-    case WebAssembly::RESULT:
-    case WebAssembly::LOCAL:
-      O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm()));
-      break;
-    default:
-      O << Op.getImm();
-      break;
-    }
-  } else if (Op.isFPImm())
+    assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+            (MII.get(MI->getOpcode()).TSFlags &
+             WebAssemblyII::VariableOpIsImmediate)) &&
+           "WebAssemblyII::VariableOpIsImmediate should be set for "
+           "variable_ops immediate ops");
+    // TODO: (MII.get(MI->getOpcode()).TSFlags &
+    //        WebAssemblyII::VariableOpImmediateIsLabel)
+    // can tell us whether this is an immediate referencing a label in the
+    // control flow stack, and it may be nice to pretty-print.
+    O << Op.getImm();
+  } else if (Op.isFPImm()) {
+    assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+            MII.get(MI->getOpcode()).TSFlags == 0) &&
+           "WebAssembly variable_ops floating point ops don't use TSFlags");
     O << toString(APFloat(Op.getFPImm()));
-  else {
+  } else {
+    assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+            (MII.get(MI->getOpcode()).TSFlags &
+             WebAssemblyII::VariableOpIsImmediate)) &&
+           "WebAssemblyII::VariableOpIsImmediate should be set for "
+           "variable_ops expr ops");
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     Op.getExpr()->print(O, &MAI);
   }
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index 39a16f5..cd6c59a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -23,6 +23,9 @@ namespace llvm {
 class MCSubtargetInfo;
 
 class WebAssemblyInstPrinter final : public MCInstPrinter {
+  uint64_t ControlFlowCounter;
+  SmallVector<std::pair<uint64_t, bool>, 0> ControlFlowStack;
+
 public:
   WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
                          const MCRegisterInfo &MRI);
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index b158ccb..bba06f6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -95,9 +95,6 @@ WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
 }
 } // end anonymous namespace
 
-MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T,
-                                                const MCRegisterInfo &MRI,
-                                                const Triple &TT,
-                                                StringRef CPU) {
+MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) {
   return new WebAssemblyAsmBackend(TT.isArch64Bit());
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
index c47a3d9..2bb58b3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
@@ -30,19 +30,31 @@ protected:
 };
 } // end anonymous namespace
 
-// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF
-// writing seriously, we should email generic-abi@googlegroups.com and ask
-// for our own ELF code.
 WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit,
                                                        uint8_t OSABI)
-    : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE,
-                              /*HasRelocationAddend=*/true) {}
+    : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_WEBASSEMBLY,
+                              /*HasRelocationAddend=*/false) {}
 
 unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target,
                                                   const MCFixup &Fixup,
                                                   bool IsPCRel) const {
-  // FIXME: Do we need our own relocs?
-  return Fixup.getKind();
+  // WebAssembly functions are not allocated in the address space. To resolve a
+  // pointer to a function, we must use a special relocation type.
+  if (const MCSymbolRefExpr *SyExp =
+          dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+    if (SyExp->getKind() == MCSymbolRefExpr::VK_WebAssembly_FUNCTION)
+      return ELF::R_WEBASSEMBLY_FUNCTION;
+
+  switch (Fixup.getKind()) {
+  case FK_Data_4:
+    assert(!is64Bit() && "4-byte relocations only supported on wasm32");
+    return ELF::R_WEBASSEMBLY_DATA;
+  case FK_Data_8:
+    assert(is64Bit() && "8-byte relocations only supported on wasm64");
+    return ELF::R_WEBASSEMBLY_DATA;
+  default:
+    llvm_unreachable("unimplemented fixup kind");
+  }
 }
 
 MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index d261779..02c717a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -27,11 +27,12 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
 
   // TODO: What should MaxInstLength be?
 
-  PrivateGlobalPrefix = "";
-  PrivateLabelPrefix = "";
-
   UseDataRegionDirectives = true;
 
+  // Use .skip instead of .zero because .zero is confusing when used with two
+  // arguments (it doesn't actually zero things out).
+  ZeroDirective = "\t.skip\t";
+
   Data8bitsDirective = "\t.int8\t";
   Data16bitsDirective = "\t.int16\t";
   Data32bitsDirective = "\t.int32\t";
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 7c6c79e..f409bd7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCFixup.h"
@@ -26,75 +27,66 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mccodeemitter"
 
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumFixups, "Number of MC fixups created.");
+
 namespace {
 class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
-  const MCRegisterInfo &MRI;
-
-public:
-  WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri,
-                           MCContext &)
-      : MRI(mri) {}
+  const MCInstrInfo &MCII;
+  const MCContext &Ctx;
 
-  ~WebAssemblyMCCodeEmitter() override {}
-
-  /// TableGen'erated function for getting the binary encoding for an
-  /// instruction.
+  // Implementation generated by tablegen.
   uint64_t getBinaryCodeForInstr(const MCInst &MI,
                                  SmallVectorImpl<MCFixup> &Fixups,
                                  const MCSubtargetInfo &STI) const;
 
-  /// Return binary encoding of operand. If the machine operand requires
-  /// relocation, record the relocation and return zero.
-  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-                             SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const;
-
-  uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,
-                            SmallVectorImpl<MCFixup> &Fixups,
-                            const MCSubtargetInfo &STI) const;
-
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
+
+public:
+  WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+      : MCII(mcii), Ctx(ctx) {}
 };
 } // end anonymous namespace
 
 MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
-                                                    const MCRegisterInfo &MRI,
                                                     MCContext &Ctx) {
-  return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx);
-}
-
-unsigned WebAssemblyMCCodeEmitter::getMachineOpValue(
-    const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups,
-    const MCSubtargetInfo &STI) const {
-  if (MO.isReg())
-    return MRI.getEncodingValue(MO.getReg());
-  if (MO.isImm())
-    return static_cast<unsigned>(MO.getImm());
-
-  assert(MO.isExpr());
-
-  assert(MO.getExpr()->getKind() == MCExpr::SymbolRef);
-
-  assert(false && "FIXME: not implemented yet");
-
-  return 0;
+  return new WebAssemblyMCCodeEmitter(MCII, Ctx);
 }
 
 void WebAssemblyMCCodeEmitter::encodeInstruction(
     const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &STI) const {
-  assert(false && "FIXME: not implemented yet");
-}
-
-// Encode WebAssembly Memory Operand
-uint64_t
-WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
-                                           SmallVectorImpl<MCFixup> &Fixups,
-                                           const MCSubtargetInfo &STI) const {
-  assert(false && "FIXME: not implemented yet");
-  return 0;
+  // FIXME: This is not the real binary encoding. This is an extremely
+  // over-simplified encoding where we just use uint64_t for everything. This
+  // is a temporary measure.
+  support::endian::Writer<support::little>(OS).write<uint64_t>(MI.getOpcode());
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  if (Desc.isVariadic())
+    support::endian::Writer<support::little>(OS).write<uint64_t>(
+        MI.getNumOperands() - Desc.NumOperands);
+  for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
+    const MCOperand &MO = MI.getOperand(i);
+    if (MO.isReg()) {
+      support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getReg());
+    } else if (MO.isImm()) {
+      support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getImm());
+    } else if (MO.isFPImm()) {
+      support::endian::Writer<support::little>(OS).write<double>(MO.getFPImm());
+    } else if (MO.isExpr()) {
+      support::endian::Writer<support::little>(OS).write<uint64_t>(0);
+      Fixups.push_back(MCFixup::create(
+          (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t),
+          MO.getExpr(), STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4,
+          MI.getLoc()));
+      ++MCNumFixups;
+    } else {
+      llvm_unreachable("unexpected operand kind");
+    }
+  }
+
+  ++MCNumEmitted; // Keep track of the # of mi's emitted.
 }
 
 #include "WebAssemblyGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 14cd295..37000f1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -15,10 +15,10 @@
 #include "WebAssemblyMCTargetDesc.h"
 #include "InstPrinter/WebAssemblyInstPrinter.h"
 #include "WebAssemblyMCAsmInfo.h"
+#include "WebAssemblyTargetStreamer.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -35,52 +35,89 @@ using namespace llvm;
 #define GET_REGINFO_MC_DESC
 #include "WebAssemblyGenRegisterInfo.inc"
 
-static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/,
-                                             const Triple &TT) {
+static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
+                                  const Triple &TT) {
   return new WebAssemblyMCAsmInfo(TT);
 }
 
-static MCInstrInfo *createWebAssemblyMCInstrInfo() {
+static MCInstrInfo *createMCInstrInfo() {
   MCInstrInfo *X = new MCInstrInfo();
   InitWebAssemblyMCInstrInfo(X);
   return X;
 }
 
-static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx,
-                                               MCAsmBackend &MAB,
-                                               raw_pwrite_stream &OS,
-                                               MCCodeEmitter *Emitter,
-                                               bool RelaxAll) {
-  return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+static MCRegisterInfo *createMCRegisterInfo(const Triple & /*T*/) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitWebAssemblyMCRegisterInfo(X, 0);
+  return X;
 }
 
-static MCInstPrinter *
-createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant,
-                               const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                               const MCRegisterInfo &MRI) {
+static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/,
+                                          unsigned SyntaxVariant,
+                                          const MCAsmInfo &MAI,
+                                          const MCInstrInfo &MII,
+                                          const MCRegisterInfo &MRI) {
   assert(SyntaxVariant == 0);
   return new WebAssemblyInstPrinter(MAI, MII, MRI);
 }
 
+static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
+                                        const MCRegisterInfo & /*MRI*/,
+                                        MCContext &Ctx) {
+  return createWebAssemblyMCCodeEmitter(MCII, Ctx);
+}
+
+static MCAsmBackend *createAsmBackend(const Target & /*T*/,
+                                      const MCRegisterInfo & /*MRI*/,
+                                      const Triple &TT, StringRef /*CPU*/) {
+  return createWebAssemblyAsmBackend(TT);
+}
+
+static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
+                                              StringRef FS) {
+  return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo & /*STI*/) {
+  return new WebAssemblyTargetELFStreamer(S);
+}
+
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+                                                 formatted_raw_ostream &OS,
+                                                 MCInstPrinter * /*InstPrint*/,
+                                                 bool /*isVerboseAsm*/) {
+  return new WebAssemblyTargetAsmStreamer(S, OS);
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeWebAssemblyTargetMC() {
   for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) {
     // Register the MC asm info.
-    RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo);
+    RegisterMCAsmInfoFn X(*T, createMCAsmInfo);
 
     // Register the MC instruction info.
-    TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo);
+    TargetRegistry::RegisterMCInstrInfo(*T, createMCInstrInfo);
 
-    // Register the object streamer
-    TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer);
+    // Register the MC register info.
+    TargetRegistry::RegisterMCRegInfo(*T, createMCRegisterInfo);
 
     // Register the MCInstPrinter.
-    TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter);
+    TargetRegistry::RegisterMCInstPrinter(*T, createMCInstPrinter);
+
+    // Register the MC code emitter.
+    TargetRegistry::RegisterMCCodeEmitter(*T, createCodeEmitter);
+
+    // Register the ASM Backend.
+    TargetRegistry::RegisterMCAsmBackend(*T, createAsmBackend);
 
-    // Register the MC code emitter
-    TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter);
+    // Register the MC subtarget info.
+    TargetRegistry::RegisterMCSubtargetInfo(*T, createMCSubtargetInfo);
 
-    // Register the ASM Backend
-    TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend);
+    // Register the object target streamer.
+    TargetRegistry::RegisterObjectTargetStreamer(*T,
+                                                 createObjectTargetStreamer);
+    // Register the asm target streamer.
+    TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
   }
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index e78f73e..9bac4f8 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -15,40 +15,62 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
 
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
-class formatted_raw_ostream;
 class MCAsmBackend;
 class MCCodeEmitter;
 class MCContext;
 class MCInstrInfo;
-class MCRegisterInfo;
 class MCObjectWriter;
-class MCStreamer;
 class MCSubtargetInfo;
-class MCTargetStreamer;
-class StringRef;
 class Target;
 class Triple;
-class raw_ostream;
 class raw_pwrite_stream;
 
 extern Target TheWebAssemblyTarget32;
 extern Target TheWebAssemblyTarget64;
 
 MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
-                                              const MCRegisterInfo &MRI,
                                               MCContext &Ctx);
 
-MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
-                                          const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU);
+MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
 
 MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
                                                  bool Is64Bit, uint8_t OSABI);
 
+namespace WebAssembly {
+enum OperandType {
+  /// Basic block label in a branch construct.
+  OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
+  /// Floating-point immediate.
+  OPERAND_FPIMM
+};
+
+/// WebAssembly-specific directive identifiers.
+enum Directive {
+  // FIXME: This is not the real binary encoding.
+  DotParam = UINT64_MAX - 0,   ///< .param
+  DotResult = UINT64_MAX - 1,  ///< .result
+  DotLocal = UINT64_MAX - 2,   ///< .local
+  DotEndFunc = UINT64_MAX - 3, ///< .endfunc
+};
+
+} // end namespace WebAssembly
+
+namespace WebAssemblyII {
+enum {
+  // For variadic instructions, this flag indicates whether an operand
+  // in the variable_ops range is an immediate value.
+  VariableOpIsImmediate = (1 << 0),
+  // For immediate values in the variable_ops range, this flag indicates
+  // whether the value represents a control-flow label.
+  VariableOpImmediateIsLabel = (1 << 1),
+};
+} // end namespace WebAssemblyII
+
 } // end namespace llvm
 
 // Defines symbolic names for WebAssembly registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
new file mode 100644
index 0000000..1d28228
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -0,0 +1,94 @@
+//==-- WebAssemblyTargetStreamer.cpp - WebAssembly Target Streamer Methods --=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines WebAssembly-specific target streamer classes.
+/// These are for implementing support for target-specific assembly directives.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetStreamer.h"
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyTargetObjectFile.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S)
+    : MCTargetStreamer(S) {}
+
+WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
+    MCStreamer &S, formatted_raw_ostream &OS)
+    : WebAssemblyTargetStreamer(S), OS(OS) {}
+
+WebAssemblyTargetELFStreamer::WebAssemblyTargetELFStreamer(MCStreamer &S)
+    : WebAssemblyTargetStreamer(S) {}
+
+static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
+  bool First = true;
+  for (MVT Type : Types) {
+    if (First)
+      First = false;
+    else
+      OS << ", ";
+    OS << WebAssembly::TypeToString(Type);
+  }
+  OS << '\n';
+}
+
+void WebAssemblyTargetAsmStreamer::emitParam(ArrayRef<MVT> Types) {
+  OS << "\t.param  \t";
+  PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitResult(ArrayRef<MVT> Types) {
+  OS << "\t.result \t";
+  PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
+  OS << "\t.local  \t";
+  PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
+
+// FIXME: What follows is not the real binary encoding.
+
+static void EncodeTypes(MCStreamer &Streamer, ArrayRef<MVT> Types) {
+  Streamer.EmitIntValue(Types.size(), sizeof(uint64_t));
+  for (MVT Type : Types)
+    Streamer.EmitIntValue(Type.SimpleTy, sizeof(uint64_t));
+}
+
+void WebAssemblyTargetELFStreamer::emitParam(ArrayRef<MVT> Types) {
+  Streamer.EmitIntValue(WebAssembly::DotParam, sizeof(uint64_t));
+  EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitResult(ArrayRef<MVT> Types) {
+  Streamer.EmitIntValue(WebAssembly::DotResult, sizeof(uint64_t));
+  EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitLocal(ArrayRef<MVT> Types) {
+  Streamer.EmitIntValue(WebAssembly::DotLocal, sizeof(uint64_t));
+  EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitEndFunc() {
+  Streamer.EmitIntValue(WebAssembly::DotEndFunc, sizeof(uint64_t));
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
new file mode 100644
index 0000000..c66a515
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -0,0 +1,68 @@
+//==-- WebAssemblyTargetStreamer.h - WebAssembly Target Streamer -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares WebAssembly-specific target streamer classes.
+/// These are for implementing support for target-specific assembly directives.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
+
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class MCELFStreamer;
+
+/// WebAssembly-specific streamer interface, to implement support
+/// WebAssembly-specific assembly directives.
+class WebAssemblyTargetStreamer : public MCTargetStreamer {
+public:
+  explicit WebAssemblyTargetStreamer(MCStreamer &S);
+
+  /// .param
+  virtual void emitParam(ArrayRef<MVT> Types) = 0;
+  /// .result
+  virtual void emitResult(ArrayRef<MVT> Types) = 0;
+  /// .local
+  virtual void emitLocal(ArrayRef<MVT> Types) = 0;
+  /// .endfunc
+  virtual void emitEndFunc() = 0;
+};
+
+/// This part is for ascii assembly output
+class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
+  formatted_raw_ostream &OS;
+
+public:
+  WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+
+  void emitParam(ArrayRef<MVT> Types) override;
+  void emitResult(ArrayRef<MVT> Types) override;
+  void emitLocal(ArrayRef<MVT> Types) override;
+  void emitEndFunc() override;
+};
+
+/// This part is for ELF object output
+class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer {
+public:
+  explicit WebAssemblyTargetELFStreamer(MCStreamer &S);
+
+  void emitParam(ArrayRef<MVT> Types) override;
+  void emitResult(ArrayRef<MVT> Types) override;
+  void emitLocal(ArrayRef<MVT> Types) override;
+  void emitEndFunc() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 0d2b4d9..45ac99d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -17,6 +17,7 @@
 #include "WebAssembly.h"
 #include "InstPrinter/WebAssemblyInstPrinter.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
 #include "WebAssemblyMCInstLower.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblyRegisterInfo.h"
@@ -69,7 +70,9 @@ private:
   void EmitJumpTableInfo() override;
   void EmitConstantPool() override;
   void EmitFunctionBodyStart() override;
+  void EmitFunctionBodyEnd() override;
   void EmitInstruction(const MachineInstr *MI) override;
+  const MCExpr *lowerConstant(const Constant *CV) override;
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
                        raw_ostream &OS) override;
@@ -80,6 +83,7 @@ private:
   MVT getRegType(unsigned RegNo) const;
   const char *toString(MVT VT) const;
   std::string regToString(const MachineOperand &MO);
+  WebAssemblyTargetStreamer *getTargetStreamer();
 };
 
 } // end anonymous namespace
@@ -90,9 +94,9 @@ private:
 
 MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
   const TargetRegisterClass *TRC =
-      TargetRegisterInfo::isVirtualRegister(RegNo) ?
-      MRI->getRegClass(RegNo) :
-      MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
+      TargetRegisterInfo::isVirtualRegister(RegNo)
+          ? MRI->getRegClass(RegNo)
+          : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
   for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
     if (TRC->hasType(T))
       return T;
@@ -101,6 +105,10 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
   return MVT::Other;
 }
 
+const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
+  return WebAssembly::TypeToString(VT);
+}
+
 std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
   unsigned RegNo = MO.getReg();
   assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
@@ -111,8 +119,10 @@ std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
   return '$' + utostr(WAReg);
 }
 
-const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
-  return WebAssembly::TypeToString(VT);
+WebAssemblyTargetStreamer *
+WebAssemblyAsmPrinter::getTargetStreamer() {
+  MCTargetStreamer *TS = OutStreamer->getTargetStreamer();
+  return static_cast<WebAssemblyTargetStreamer *>(TS);
 }
 
 //===----------------------------------------------------------------------===//
@@ -145,29 +155,20 @@ static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
 }
 
 void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
-  if (!MFI->getParams().empty()) {
-    MCInst Param;
-    Param.setOpcode(WebAssembly::PARAM);
-    for (MVT VT : MFI->getParams())
-      Param.addOperand(MCOperand::createImm(VT.SimpleTy));
-    EmitToStreamer(*OutStreamer, Param);
-  }
+  if (!MFI->getParams().empty())
+    getTargetStreamer()->emitParam(MFI->getParams());
 
   SmallVector<MVT, 4> ResultVTs;
   const Function &F(*MF->getFunction());
   ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
+
   // If the return type needs to be legalized it will get converted into
   // passing a pointer.
-  if (ResultVTs.size() == 1) {
-    MCInst Result;
-    Result.setOpcode(WebAssembly::RESULT);
-    Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy));
-    EmitToStreamer(*OutStreamer, Result);
-  }
+  if (ResultVTs.size() == 1)
+    getTargetStreamer()->emitResult(ResultVTs);
 
   bool AnyWARegs = false;
-  MCInst Local;
-  Local.setOpcode(WebAssembly::LOCAL);
+  SmallVector<MVT, 16> LocalTypes;
   for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
     unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
     unsigned WAReg = MFI->getWAReg(VReg);
@@ -180,22 +181,26 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
     // Don't declare stackified registers.
     if (int(WAReg) < 0)
       continue;
-    Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy));
+    LocalTypes.push_back(getRegType(VReg));
     AnyWARegs = true;
   }
   auto &PhysRegs = MFI->getPhysRegs();
   for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) {
     if (PhysRegs[PReg] == -1U)
       continue;
-    Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy));
+    LocalTypes.push_back(getRegType(PReg));
     AnyWARegs = true;
   }
   if (AnyWARegs)
-    EmitToStreamer(*OutStreamer, Local);
+    getTargetStreamer()->emitLocal(LocalTypes);
 
   AsmPrinter::EmitFunctionBodyStart();
 }
 
+void WebAssemblyAsmPrinter::EmitFunctionBodyEnd() {
+  getTargetStreamer()->emitEndFunc();
+}
+
 void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
 
@@ -207,10 +212,6 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // These represent values which are live into the function entry, so there's
     // no instruction to emit.
     break;
-  case WebAssembly::LOOP_END:
-    // This is a no-op which just exists to tell AsmPrinter.cpp that there's a
-    // fallthrough which nevertheless requires a label for the destination here.
-    break;
   default: {
     WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
     MCInst TmpInst;
@@ -221,6 +222,14 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
 }
 
+const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+    if (GV->getValueType()->isFunctionTy())
+      return MCSymbolRefExpr::create(
+          getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
+  return AsmPrinter::lowerConstant(CV);
+}
+
 bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
                                             unsigned OpNo, unsigned AsmVariant,
                                             const char *ExtraCode,
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index e9671ee..a39349c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -256,7 +257,8 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
 /// code) for a branch instruction to both branch to a block and fallthrough
 /// to it, so we check the actual branch operands to see if there are any
 /// explicit mentions.
-static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) {
+static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
+                                 MachineBasicBlock *MBB) {
   for (MachineInstr &MI : Pred->terminators())
     for (MachineOperand &MO : MI.explicit_operands())
       if (MO.isMBB() && MO.getMBB() == MBB)
@@ -325,13 +327,21 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     InsertPos = Header->getFirstTerminator();
     while (InsertPos != Header->begin() &&
            prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) &&
-           prev(InsertPos)->getOpcode() != WebAssembly::LOOP)
+           prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
+           prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
+           prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
       --InsertPos;
   }
 
   // Add the BLOCK.
-  BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK))
-      .addMBB(&MBB);
+  BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK));
+
+  // Mark the end of the block.
+  InsertPos = MBB.begin();
+  while (InsertPos != MBB.end() &&
+         InsertPos->getOpcode() == WebAssembly::END_LOOP)
+    ++InsertPos;
+  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK));
 
   // Track the farthest-spanning scope that ends at this point.
   int Number = MBB.getNumber();
@@ -341,10 +351,11 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
 }
 
 /// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
-static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
-                            SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
-                            const WebAssemblyInstrInfo &TII,
-                            const MachineLoopInfo &MLI) {
+static void PlaceLoopMarker(
+    MachineBasicBlock &MBB, MachineFunction &MF,
+    SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+    DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops,
+    const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
   MachineLoop *Loop = MLI.getLoopFor(&MBB);
   if (!Loop || Loop->getHeader() != &MBB)
     return;
@@ -361,14 +372,19 @@ static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     Iter = next(MachineFunction::iterator(Bottom));
   }
   MachineBasicBlock *AfterLoop = &*Iter;
-  BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP))
-      .addMBB(AfterLoop);
 
-  // Emit a special no-op telling the asm printer that we need a label to close
-  // the loop scope, even though the destination is only reachable by
-  // fallthrough.
-  if (!Bottom->back().isBarrier())
-    BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END));
+  // Mark the beginning of the loop (after the end of any existing loop that
+  // ends here).
+  auto InsertPos = MBB.begin();
+  while (InsertPos != MBB.end() &&
+         InsertPos->getOpcode() == WebAssembly::END_LOOP)
+    ++InsertPos;
+  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP));
+
+  // Mark the end of the loop.
+  MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(),
+                              TII.get(WebAssembly::END_LOOP));
+  LoopTops[End] = &MBB;
 
   assert((!ScopeTops[AfterLoop->getNumber()] ||
           ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
@@ -377,6 +393,19 @@ static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     ScopeTops[AfterLoop->getNumber()] = &MBB;
 }
 
+static unsigned
+GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
+         const MachineBasicBlock *MBB) {
+  unsigned Depth = 0;
+  for (auto X : reverse(Stack)) {
+    if (X == MBB)
+      break;
+    ++Depth;
+  }
+  assert(Depth < Stack.size() && "Branch destination should be in scope");
+  return Depth;
+}
+
 /// Insert LOOP and BLOCK markers at appropriate places.
 static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
                          const WebAssemblyInstrInfo &TII,
@@ -388,25 +417,57 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
   // we may insert at the end.
   SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
 
+  // For eacn LOOP_END, the corresponding LOOP.
+  DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops;
+
   for (auto &MBB : MF) {
     // Place the LOOP for MBB if MBB is the header of a loop.
-    PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI);
+    PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
 
     // Place the BLOCK for MBB if MBB is branched to from above.
     PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT);
   }
-}
 
-#ifndef NDEBUG
-static bool
-IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack,
-          const MachineBasicBlock *MBB) {
-  for (const auto &Pair : Stack)
-    if (Pair.first == MBB)
-      return true;
-  return false;
+  // Now rewrite references to basic blocks to be depth immediates.
+  SmallVector<const MachineBasicBlock *, 8> Stack;
+  for (auto &MBB : reverse(MF)) {
+    for (auto &MI : reverse(MBB)) {
+      switch (MI.getOpcode()) {
+      case WebAssembly::BLOCK:
+        assert(ScopeTops[Stack.back()->getNumber()] == &MBB &&
+               "Block should be balanced");
+        Stack.pop_back();
+        break;
+      case WebAssembly::LOOP:
+        assert(Stack.back() == &MBB && "Loop top should be balanced");
+        Stack.pop_back();
+        Stack.pop_back();
+        break;
+      case WebAssembly::END_BLOCK:
+        Stack.push_back(&MBB);
+        break;
+      case WebAssembly::END_LOOP:
+        Stack.push_back(&MBB);
+        Stack.push_back(LoopTops[&MI]);
+        break;
+      default:
+        if (MI.isTerminator()) {
+          // Rewrite MBB operands to be depth immediates.
+          SmallVector<MachineOperand, 4> Ops(MI.operands());
+          while (MI.getNumOperands() > 0)
+            MI.RemoveOperand(MI.getNumOperands() - 1);
+          for (auto MO : Ops) {
+            if (MO.isMBB())
+              MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB()));
+            MI.addOperand(MF, MO);
+          }
+        }
+        break;
+      }
+    }
+  }
+  assert(Stack.empty() && "Control flow should be balanced");
 }
-#endif
 
 bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "********** CFG Stackifying **********\n"
@@ -415,7 +476,9 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
 
   const auto &MLI = getAnalysis<MachineLoopInfo>();
   auto &MDT = getAnalysis<MachineDominatorTree>();
+  // Liveness is not tracked for EXPR_STACK physreg.
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  MF.getRegInfo().invalidateLiveness();
 
   // RPO sorting needs all loops to be single-entry.
   EliminateMultipleEntryLoops(MF, MLI);
@@ -426,43 +489,5 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
   // Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
   PlaceMarkers(MF, MLI, TII, MDT);
 
-#ifndef NDEBUG
-  // Verify that block and loop beginnings and endings are in LIFO order, and
-  // that all references to blocks are to blocks on the stack at the point of
-  // the reference.
-  SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack;
-  for (auto &MBB : MF) {
-    while (!Stack.empty() && Stack.back().first == &MBB)
-      if (Stack.back().second) {
-        assert(Stack.size() >= 2);
-        Stack.pop_back();
-        Stack.pop_back();
-      } else {
-        assert(Stack.size() >= 1);
-        Stack.pop_back();
-      }
-    for (auto &MI : MBB)
-      switch (MI.getOpcode()) {
-      case WebAssembly::LOOP:
-        Stack.push_back(std::make_pair(&MBB, false));
-        Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true));
-        break;
-      case WebAssembly::BLOCK:
-        Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false));
-        break;
-      default:
-        // Verify that all referenced blocks are in scope. A reference to a
-        // block with a negative number is invalid, but can happen with inline
-        // asm, so we shouldn't assert on it, but instead let CodeGen properly
-        // fail on it.
-        for (const MachineOperand &MO : MI.explicit_operands())
-          if (MO.isMBB() && MO.getMBB()->getNumber() >= 0)
-            assert(IsOnStack(Stack, MO.getMBB()));
-        break;
-      }
-  }
-  assert(Stack.empty());
-#endif
-
   return true;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 7a89f78..e9933b0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -573,7 +573,8 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
   SDLoc DL(Op);
   const auto *GA = cast<GlobalAddressSDNode>(Op);
   EVT VT = Op.getValueType();
-  assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+  assert(GA->getTargetFlags() == 0 &&
+         "Unexpected target flags on generic GlobalAddressSDNode");
   if (GA->getAddressSpace() != 0)
     fail(DL, DAG, "WebAssembly only expects the 0 address space");
   return DAG.getNode(
@@ -587,9 +588,16 @@ WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
   SDLoc DL(Op);
   const auto *ES = cast<ExternalSymbolSDNode>(Op);
   EVT VT = Op.getValueType();
-  assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+  assert(ES->getTargetFlags() == 0 &&
+         "Unexpected target flags on generic ExternalSymbolSDNode");
+  // Set the TargetFlags to 0x1 which indicates that this is a "function"
+  // symbol rather than a data symbol. We do this unconditionally even though
+  // we don't know anything about the symbol other than its name, because all
+  // external symbols used in target-independent SelectionDAG code are for
+  // functions.
   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
-                     DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
+                     DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
+                                                 /*TargetFlags=*/0x1));
 }
 
 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 05efe89..fda9595 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -41,28 +41,33 @@ let Defs = [ARGUMENTS] in {
 // TODO: SelectionDAG's lowering insists on using a pointer as the index for
 // jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode
 // currently.
+// Set TSFlags{0} to 1 to indicate that the variable_ops are immediates.
+// Set TSFlags{1} to 1 to indicate that the immediates represent labels.
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
 def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops),
                         [(WebAssemblytableswitch I32:$index, bb:$default)],
-                        "tableswitch\t$index, $default">;
+                        "tableswitch\t$index, $default"> {
+  let TSFlags{0} = 1;
+  let TSFlags{1} = 1;
+}
 def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops),
                         [(WebAssemblytableswitch I64:$index, bb:$default)],
-                        "tableswitch\t$index, $default">;
+                        "tableswitch\t$index, $default"> {
+  let TSFlags{0} = 1;
+  let TSFlags{1} = 1;
+}
 } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
 
-// Placemarkers to indicate the start of a block or loop scope. These
+// Placemarkers to indicate the start or end of a block or loop scope. These
 // use/clobber EXPR_STACK to prevent them from being moved into the middle of
 // an expression tree.
 let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in {
-def BLOCK     : I<(outs), (ins bb_op:$dst), [], "block   \t$dst">;
-def LOOP      : I<(outs), (ins bb_op:$dst), [], "loop    \t$dst">;
+def BLOCK     : I<(outs), (ins), [], "block">;
+def LOOP      : I<(outs), (ins), [], "loop">;
+def END_BLOCK : I<(outs), (ins), [], "end_block">;
+def END_LOOP  : I<(outs), (ins), [], "end_loop">;
 } // Uses = [EXPR_STACK], Defs = [EXPR_STACK]
 
-// No-op to indicate to the AsmPrinter that a loop ends here, so a
-// basic block label is needed even if it wouldn't otherwise appear so.
-let isTerminator = 1, hasCtrlDep = 1 in
-def LOOP_END : I<(outs), (ins), []>;
-
 multiclass RETURN<WebAssemblyRegClass vt> {
   def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)],
                      "return  \t$val">;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 5e7663c..028e9af 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -74,6 +74,9 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     case WebAssembly::BR_IF:
       if (HaveCond)
         return true;
+      // If we're running after CFGStackify, we can't optimize further.
+      if (!MI.getOperand(1).isMBB())
+        return true;
       Cond.push_back(MachineOperand::CreateImm(true));
       Cond.push_back(MI.getOperand(0));
       TBB = MI.getOperand(1).getMBB();
@@ -82,12 +85,18 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     case WebAssembly::BR_UNLESS:
       if (HaveCond)
         return true;
+      // If we're running after CFGStackify, we can't optimize further.
+      if (!MI.getOperand(1).isMBB())
+        return true;
       Cond.push_back(MachineOperand::CreateImm(false));
       Cond.push_back(MI.getOperand(0));
       TBB = MI.getOperand(1).getMBB();
       HaveCond = true;
       break;
     case WebAssembly::BR:
+      // If we're running after CFGStackify, we can't optimize further.
+      if (!MI.getOperand(0).isMBB())
+        return true;
       if (!HaveCond)
         TBB = MI.getOperand(0).getMBB();
       else
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index f0b4ce7..2e682a4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -66,8 +66,18 @@ def WebAssemblywrapper  : SDNode<"WebAssemblyISD::Wrapper",
 // WebAssembly-specific Operands.
 //===----------------------------------------------------------------------===//
 
+let OperandNamespace = "WebAssembly" in {
+
+let OperandType = "OPERAND_BASIC_BLOCK" in
 def bb_op : Operand<OtherVT>;
 
+let OperandType = "OPERAND_FPIMM" in {
+def f32imm_op : Operand<f32>;
+def f64imm_op : Operand<f64>;
+} // OperandType = "OPERAND_FPIMM"
+
+} // OperandNamespace = "WebAssembly"
+
 //===----------------------------------------------------------------------===//
 // WebAssembly Instruction Format Definitions.
 //===----------------------------------------------------------------------===//
@@ -120,31 +130,20 @@ def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm),
 def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm),
                   [(set I64:$res, imm:$imm)],
                   "i64.const\t$res, $imm">;
-def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm),
+def CONST_F32 : I<(outs F32:$res), (ins f32imm_op:$imm),
                   [(set F32:$res, fpimm:$imm)],
                   "f32.const\t$res, $imm">;
-def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm),
+def CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
                   [(set F64:$res, fpimm:$imm)],
                   "f64.const\t$res, $imm">;
 } // isMoveImm = 1
 
 } // Defs = [ARGUMENTS]
 
-def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)),
-          (CONST_I32 tglobaladdr:$dst)>;
-def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)),
-          (CONST_I32 texternalsym:$dst)>;
-def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)),
-          (CONST_I32 tjumptable:$dst)>;
-
-let Defs = [ARGUMENTS] in {
-
-// Function signature and local variable declaration "instructions".
-def PARAM  : I<(outs), (ins variable_ops), [], ".param  \t">;
-def RESULT : I<(outs), (ins variable_ops), [], ".result \t">;
-def LOCAL  : I<(outs), (ins variable_ops), [], ".local  \t">;
-
-} // Defs = [ARGUMENTS]
+def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+          (CONST_I32 tglobaladdr:$addr)>;
+def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+          (CONST_I32 texternalsym:$addr)>;
 
 //===----------------------------------------------------------------------===//
 // Additional sets of instructions.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 74ec45d..b39ac52 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -24,10 +24,25 @@
 // WebAssembly constant offsets are performed as unsigned with infinite
 // precision, so we need to check for NoUnsignedWrap so that we don't fold an
 // offset for an add that needs wrapping.
-def regPlusImm : PatFrag<(ops node:$off, node:$addr),
+def regPlusImm : PatFrag<(ops node:$addr, node:$off),
                          (add node:$addr, node:$off),
                          [{ return N->getFlags()->hasNoUnsignedWrap(); }]>;
 
+// GlobalAddresses are conceptually unsigned values, so we can also fold them
+// into immediate values as long as their offsets are non-negative.
+def regPlusGA : PatFrag<(ops node:$addr, node:$off),
+                        (add node:$addr, node:$off),
+                        [{
+  return N->getFlags()->hasNoUnsignedWrap() ||
+         (N->getOperand(1)->getOpcode() == WebAssemblyISD::Wrapper &&
+          isa<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0)) &&
+          cast<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0))
+             ->getOffset() >= 0);
+}]>;
+
+// We don't need a regPlusES because external symbols never have constant
+// offsets folded into them, so we can just use add.
+
 let Defs = [ARGUMENTS] in {
 
 // Basic load.
@@ -49,29 +64,33 @@ def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>;
 def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>;
 
 // Select loads with a constant offset.
-def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))),
           (LOAD_I32 imm:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))),
           (LOAD_I64 imm:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))),
           (LOAD_F32 imm:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))),
           (LOAD_F64 imm:$off, $addr)>;
-def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (load (regPlusGA I32:$addr,
+                                (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (load (regPlusGA I32:$addr,
+                                (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(f32 (load (regPlusGA I32:$addr,
+                                (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD_F32 tglobaladdr:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(f64 (load (regPlusGA I32:$addr,
+                                (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD_F64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
           (LOAD_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
           (LOAD_I64 texternalsym:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
           (LOAD_F32 texternalsym:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
           (LOAD_F64 texternalsym:$off, $addr)>;
 
 // Select loads with just a constant offset.
@@ -135,65 +154,85 @@ def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>;
 def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
 
 // Select extending loads with a constant offset.
-def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
           (LOAD8_S_I32 imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
           (LOAD8_U_I32 imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
           (LOAD16_S_I32 imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
           (LOAD16_U_I32 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
           (LOAD8_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
           (LOAD8_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
           (LOAD16_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
           (LOAD16_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))),
           (LOAD32_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))),
           (LOAD32_U_I64 imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD8_S_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr,
+                                       (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD16_S_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr,
+                                       (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD8_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr,
+                                       (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD16_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr,
+                                       (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr,
+                                       (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD32_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr,
+                                       (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off)))),
           (LOAD8_S_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off)))),
           (LOAD8_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (add I32:$addr,
+                                 (WebAssemblywrapper texternalsym:$off)))),
           (LOAD16_S_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (add I32:$addr,
+                                 (WebAssemblywrapper texternalsym:$off)))),
           (LOAD16_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off)))),
           (LOAD8_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off)))),
           (LOAD8_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (add I32:$addr,
+                                 (WebAssemblywrapper texternalsym:$off)))),
           (LOAD16_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (add I32:$addr,
+                                 (WebAssemblywrapper texternalsym:$off)))),
           (LOAD16_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (add I32:$addr,
+                                 (WebAssemblywrapper texternalsym:$off)))),
           (LOAD32_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (add I32:$addr,
+                                 (WebAssemblywrapper texternalsym:$off)))),
           (LOAD32_U_I64 texternalsym:$off, $addr)>;
 
 // Select extending loads with just a constant offset.
@@ -259,35 +298,45 @@ def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
 def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
 
 // Select "don't care" extending loads with a constant offset.
-def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
           (LOAD8_U_I32 imm:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
           (LOAD16_U_I32 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
           (LOAD8_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
           (LOAD16_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))),
           (LOAD32_U_I64 imm:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr,
+                                     (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr,
+                                     (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off)))),
           (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (add I32:$addr,
+                               (WebAssemblywrapper texternalsym:$off)))),
           (LOAD8_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off)))),
           (LOAD16_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (add I32:$addr,
+                               (WebAssemblywrapper texternalsym:$off)))),
           (LOAD8_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off)))),
           (LOAD16_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off)))),
           (LOAD32_U_I64 texternalsym:$off, $addr)>;
 
 // Select "don't care" extending loads with just a constant offset.
@@ -343,29 +392,37 @@ def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>;
 
 // Select stores with a constant offset.
-def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE_F32 imm:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE_F64 imm:$off, I32:$addr, F64:$val)>;
-def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store I32:$val, (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off))),
           (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store I64:$val, (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off))),
           (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store F32:$val, (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off))),
           (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store F64:$val, (regPlusGA I32:$addr,
+                                      (WebAssemblywrapper tglobaladdr:$off))),
           (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>;
-def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store I32:$val, (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off))),
           (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store I64:$val, (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off))),
           (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store F32:$val, (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off))),
           (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store F64:$val, (add I32:$addr,
+                                (WebAssemblywrapper texternalsym:$off))),
           (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>;
 
 // Select stores with just a constant offset.
@@ -423,35 +480,54 @@ def : Pat<(truncstorei32 I64:$val, I32:$addr),
           (STORE32_I64 0, I32:$addr, I64:$val)>;
 
 // Select truncating stores with a constant offset.
-def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE8_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE16_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE8_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE16_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)),
           (STORE32_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val,
+                        (regPlusGA I32:$addr,
+                                   (WebAssemblywrapper tglobaladdr:$off))),
           (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val,
+                         (regPlusGA I32:$addr,
+                                    (WebAssemblywrapper tglobaladdr:$off))),
           (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val,
+                        (regPlusGA I32:$addr,
+                                   (WebAssemblywrapper tglobaladdr:$off))),
           (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val,
+                         (regPlusGA I32:$addr,
+                                    (WebAssemblywrapper tglobaladdr:$off))),
           (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val,
+                         (regPlusGA I32:$addr,
+                                    (WebAssemblywrapper tglobaladdr:$off))),
           (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val, (add I32:$addr,
+                                       (WebAssemblywrapper texternalsym:$off))),
           (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val,
+                         (add I32:$addr,
+                              (WebAssemblywrapper texternalsym:$off))),
           (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val,
+                        (add I32:$addr,
+                             (WebAssemblywrapper texternalsym:$off))),
           (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val,
+                         (add I32:$addr,
+                              (WebAssemblywrapper texternalsym:$off))),
           (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val,
+                         (add I32:$addr,
+                              (WebAssemblywrapper texternalsym:$off))),
           (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>;
 
 // Select truncating stores with just a constant offset.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index a953f82..022a448 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -36,15 +36,17 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
   return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
 }
 
-MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
-                                                     MCSymbol *Sym) const {
-  assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags");
+MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
+                                                     int64_t Offset,
+                                                     bool IsFunc) const {
+  MCSymbolRefExpr::VariantKind VK =
+      IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
+             : MCSymbolRefExpr::VK_None;
+  const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
 
-  const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
-
-  int64_t Offset = MO.getOffset();
   if (Offset != 0) {
-    assert(!MO.isJTI() && "Unexpected offset with jump table index");
+    if (IsFunc)
+      report_fatal_error("Function addresses with offsets not supported");
     Expr =
         MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
   }
@@ -64,6 +66,9 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
     default:
       MI->dump();
       llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_MachineBasicBlock:
+      MI->dump();
+      llvm_unreachable("MachineBasicBlock operand should have been rewritten");
     case MachineOperand::MO_Register: {
       // Ignore all implicit register operands.
       if (MO.isImplicit())
@@ -89,15 +94,19 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
         llvm_unreachable("unknown floating point immediate type");
       break;
     }
-    case MachineOperand::MO_MachineBasicBlock:
-      MCOp = MCOperand::createExpr(
-          MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
-      break;
     case MachineOperand::MO_GlobalAddress:
-      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      assert(MO.getTargetFlags() == 0 &&
+             "WebAssembly does not use target flags on GlobalAddresses");
+      MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(),
+                                MO.getGlobal()->getValueType()->isFunctionTy());
       break;
     case MachineOperand::MO_ExternalSymbol:
-      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      // The target flag indicates whether this is a symbol for a
+      // variable or a function.
+      assert((MO.getTargetFlags() & -2) == 0 &&
+             "WebAssembly uses only one target flag bit on ExternalSymbols");
+      MCOp = LowerSymbolOperand(GetExternalSymbolSymbol(MO), /*Offset=*/0,
+                                MO.getTargetFlags() & 1);
       break;
     }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index 6d70470..ab4ba1c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -31,9 +31,10 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
   MCContext &Ctx;
   AsmPrinter &Printer;
 
-  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
   MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
   MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+  MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset,
+                               bool IsFunc) const;
 
 public:
   WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer)
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 89ef5cd..537c147 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -147,8 +147,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
   // block boundaries, and the blocks aren't ordered so the block visitation
   // order isn't significant, but we may want to change this in the future.
   for (MachineBasicBlock &MBB : MF) {
-    for (MachineInstr &MI : reverse(MBB)) {
-      MachineInstr *Insert = &MI;
+    // Don't use a range-based for loop, because we modify the list as we're
+    // iterating over it and the end iterator may change.
+    for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
+      MachineInstr *Insert = &*MII;
       // Don't nest anything inside a phi.
       if (Insert->getOpcode() == TargetOpcode::PHI)
         break;
@@ -221,7 +223,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
         Insert = Def;
       }
       if (AnyStackified)
-        ImposeStackOrdering(&MI);
+        ImposeStackOrdering(&*MII);
     }
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index dcada45..90d8dda 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -61,17 +61,23 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
   MachineFunction &MF = *MBB.getParent();
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   const MachineFrameInfo& MFI = *MF.getFrameInfo();
-  int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
+  int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
 
   if (MI.mayLoadOrStore()) {
     // If this is a load or store, make it relative to SP and fold the frame
-    // offset directly in
-    assert(MI.getOperand(1).getImm() == 0 &&
-           "Can't eliminate FI yet if offset is already set");
-    MI.getOperand(1).setImm(FrameOffset);
+    // offset directly in.
+    assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0);
+    int64_t Offset = MI.getOperand(1).getImm() + FrameOffset;
+
+    if (static_cast<uint64_t>(Offset) > std::numeric_limits<uint32_t>::max()) {
+      // If this happens the program is invalid, but better to error here than
+      // generate broken code.
+      report_fatal_error("Memory offset field overflow");
+    }
+    MI.getOperand(1).setImm(Offset);
     MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
   } else {
-    // Otherwise create an i32.add SP, offset and make it the operand
+    // Otherwise create an i32.add SP, offset and make it the operand.
     auto &MRI = MF.getRegInfo();
     const auto *TII = MF.getSubtarget().getInstrInfo();
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index e31ea46..b290b4b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -45,8 +45,9 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
     const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
     CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128"
-                                            : "e-p:32:32-i64:64-n32:64-S128",
+    : LLVMTargetMachine(T,
+                        TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
+                                         : "e-m:e-p:32:32-i64:64-n32:64-S128",
                         TT, CPU, FS, Options, RM, CM, OL),
       TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
   // WebAssembly type-checks expressions, but a noreturn function with a return
diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 92ecde3..91b3fff 100644
--- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -5,23 +5,6 @@
 pr38151.c
 va-arg-22.c
 
-# WebAssemblyRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator, int, unsigned int, llvm::RegScavenger *) const: Assertion `MI.getOperand(1).getImm() == 0 && "Can't eliminate FI yet if offset is already set"'
-20030313-1.c
-20030916-1.c
-20031012-1.c
-20041126-1.c
-20060420-1.c
-20071202-1.c
-20120808-1.c
-pr20527-1.c
-pr27073.c
-pr36339.c
-pr37573.c
-pr43236.c
-pr43835.c
-pr45070.c
-pr51933.c
-
 # TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
 struct-ret-1.c
 va-arg-11.c
@@ -140,8 +123,6 @@ pr38051.c
 pr39100.c
 
 pr39339.c
-pr40022.c
-pr40657.c
 
 pr43987.c
 
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index fbec662..01e65b8 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -29,7 +29,7 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
 
 /// This pass initializes a global base register for PIC on x86-32.
-FunctionPass* createX86GlobalBaseRegPass();
+FunctionPass *createX86GlobalBaseRegPass();
 
 /// This pass combines multiple accesses to local-dynamic TLS variables so that
 /// the TLS base address for the module is only fetched once per execution path
@@ -49,12 +49,13 @@ FunctionPass *createX86IssueVZeroUpperPass();
 /// This will prevent a stall when returning on the Atom.
 FunctionPass *createX86PadShortFunctions();
 
-/// Return a a pass that selectively replaces certain instructions (like add,
+/// Return a pass that selectively replaces certain instructions (like add,
 /// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA
 /// instructions, in order to eliminate execution delays in some processors.
 FunctionPass *createX86FixupLEAs();
 
-/// Return a pass that removes redundant address recalculations.
+/// Return a pass that removes redundant LEA instructions and redundant address
+/// recalculations.
 FunctionPass *createX86OptimizeLEAs();
 
 /// Return a pass that optimizes the code-size of x86 call sequences. This is
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index 54d88cb..e8b96e7 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -831,6 +831,12 @@ def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
 def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
                                              R8, R9, R10, R11)>;
 
+// CSRs that are handled by prologue, epilogue.
+def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>;
+
 // All GPRs - except r11
 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
                                               R8, R9, R10, RSP)>;
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 629d4d3..f48b479 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1002,6 +1002,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
   if (!FuncInfo.CanLowerReturn)
     return false;
 
+  if (TLI.supportSplitCSR(FuncInfo.MF))
+    return false;
+
   CallingConv::ID CC = F.getCallingConv();
   if (CC != CallingConv::C &&
       CC != CallingConv::Fast &&
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index d31aab0..1ec93b5 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -265,7 +265,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       // Without SSE, i64->f64 goes through memory.
       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
     }
-  }
+  } else if (!Subtarget->is64Bit())
+    setOperationAction(ISD::BITCAST      , MVT::i64  , Custom);
 
   // Scalar integer divide and remainder are lowered to use operations that
   // produce two results, to match the available instructions. This exposes
@@ -2310,6 +2311,18 @@ X86TargetLowering::LowerReturn(SDValue Chain,
         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
   }
 
+  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *I =
+      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+  if (I) {
+    for (; *I; ++I) {
+      if (X86::GR64RegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+      else
+        llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+    }
+  }
+
   RetOps[0] = Chain;  // Update chain.
 
   // Add the flag if we have it.
@@ -3907,6 +3920,7 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::PSHUFHW:
   case X86ISD::PSHUFLW:
   case X86ISD::SHUFP:
+  case X86ISD::INSERTPS:
   case X86ISD::PALIGNR:
   case X86ISD::MOVLHPS:
   case X86ISD::MOVLHPD:
@@ -4157,6 +4171,35 @@ static bool hasFPCMov(unsigned X86CC) {
   }
 }
 
+
+bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                           const CallInst &I,
+                                           unsigned Intrinsic) const {
+
+  const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
+  if (!IntrData)
+    return false;
+
+  switch (IntrData->Type) {
+  case LOADA:
+  case LOADU: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(I.getType());
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.align = (IntrData->Type == LOADA ? Info.memVT.getSizeInBits()/8 : 1);
+    Info.vol = false;
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  default:
+    break;
+  }
+
+  return false;
+}
+
 /// Returns true if the target can instruction select the
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
@@ -4743,8 +4786,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
 /// uses one source. Note that this will set IsUnary for shuffles which use a
 /// single input multiple times, and in those cases it will
 /// adjust the mask to only have indices within that single input.
-/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
-static bool getTargetShuffleMask(SDNode *N, MVT VT,
+static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
   unsigned NumElems = VT.getVectorNumElements();
   SDValue ImmN;
@@ -4761,6 +4803,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
     DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
+  case X86ISD::INSERTPS:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+    break;
   case X86ISD::UNPCKH:
     DecodeUNPCKHMask(VT, Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -4870,10 +4917,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
   case X86ISD::VPERM2X128:
     ImmN = N->getOperand(N->getNumOperands()-1);
     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
-    // Mask only contains negative index if an element is zero.
-    if (std::any_of(Mask.begin(), Mask.end(),
-                    [](int M){ return M == SM_SentinelZero; }))
-      return false;
+    IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
   case X86ISD::MOVSLDUP:
     DecodeMOVSLDUPMask(VT, Mask);
@@ -5008,6 +5052,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
   if (Mask.empty())
     return false;
 
+  // Check if we're getting a shuffle mask with zero'd elements.
+  if (!AllowSentinelZero)
+    if (std::any_of(Mask.begin(), Mask.end(),
+                    [](int M){ return M == SM_SentinelZero; }))
+      return false;
+
   // If we have a fake unary shuffle, the shuffle mask is spread across two
   // inputs that are actually the same node. Re-map the mask to always point
   // into the first input.
@@ -5046,19 +5096,19 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
   // Recurse into target specific vector shuffles to find scalars.
   if (isTargetShuffle(Opcode)) {
     MVT ShufVT = V.getSimpleValueType();
-    unsigned NumElems = ShufVT.getVectorNumElements();
+    int NumElems = (int)ShufVT.getVectorNumElements();
     SmallVector<int, 16> ShuffleMask;
     bool IsUnary;
 
-    if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
+    if (!getTargetShuffleMask(N, ShufVT, false, ShuffleMask, IsUnary))
       return SDValue();
 
     int Elt = ShuffleMask[Index];
-    if (Elt < 0)
+    if (Elt == SM_SentinelUndef)
       return DAG.getUNDEF(ShufVT.getVectorElementType());
 
-    SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
-                                         : N->getOperand(1);
+    assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
+    SDValue NewV = (Elt < NumElems) ? N->getOperand(0) : N->getOperand(1);
     return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
                                Depth+1);
   }
@@ -8165,6 +8215,13 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
             DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
       return TruncBroadcast;
 
+  MVT BroadcastVT = VT;
+
+  // Peek through any bitcast (only useful for loads).
+  SDValue BC = V;
+  while (BC.getOpcode() == ISD::BITCAST)
+    BC = BC.getOperand(0);
+
   // Also check the simpler case, where we can directly reuse the scalar.
   if (V.getOpcode() == ISD::BUILD_VECTOR ||
       (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
@@ -8174,13 +8231,17 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
     // Only AVX2 has register broadcasts.
     if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
       return SDValue();
-  } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+  } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
+    // 32-bit targets need to load i64 as a f64 and then bitcast the result.
+    if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64)
+      BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
+
     // If we are broadcasting a load that is only used by the shuffle
     // then we can reduce the vector load to the broadcasted scalar load.
-    LoadSDNode *Ld = cast<LoadSDNode>(V);
+    LoadSDNode *Ld = cast<LoadSDNode>(BC);
     SDValue BaseAddr = Ld->getOperand(1);
     EVT AddrVT = BaseAddr.getValueType();
-    EVT SVT = VT.getScalarType();
+    EVT SVT = BroadcastVT.getScalarType();
     unsigned Offset = BroadcastIdx * SVT.getStoreSize();
     SDValue NewAddr = DAG.getNode(
         ISD::ADD, DL, AddrVT, BaseAddr,
@@ -8194,7 +8255,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
     return SDValue();
   }
 
-  return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V);
+  V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V);
+  return DAG.getBitcast(VT, V);
 }
 
 // Check for whether we can use INSERTPS to perform the shuffle. We only use
@@ -12474,8 +12536,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     // location.
     SDValue Chain = DAG.getEntryNode();
     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, DL, true), DL);
     SDValue Args[] = { Chain, Offset };
     Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
+    Chain =
+        DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
+                           DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
 
     // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -12648,13 +12714,21 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
     return Op;
   }
 
+  SDValue ValueToStore = Op.getOperand(0);
+  if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+      !Subtarget->is64Bit())
+    // Bitcasting to f64 here allows us to do a single 64-bit store from
+    // an SSE register, avoiding the store forwarding penalty that would come
+    // with two 32-bit stores.
+    ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+
   unsigned Size = SrcVT.getSizeInBits()/8;
   MachineFunction &MF = DAG.getMachineFunction();
   auto PtrVT = getPointerTy(MF.getDataLayout());
   int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Chain = DAG.getStore(
-      DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
+      DAG.getEntryNode(), dl, ValueToStore, StackSlot,
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
       false, 0);
   return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
@@ -13027,7 +13101,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   }
 
   assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+  SDValue ValueToStore = Op.getOperand(0);
+  if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
+    // Bitcasting to f64 here allows us to do a single 64-bit store from
+    // an SSE register, avoiding the store forwarding penalty that would come
+    // with two 32-bit stores.
+    ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore,
                                StackSlot, MachinePointerInfo(),
                                false, false, 0);
   // For i64 source, we need to add the appropriate power of 2 if the input
@@ -17487,7 +17567,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     return DAG.getMergeValues(Results, dl);
   }
   case COMPRESS_TO_MEM: {
-    SDLoc dl(Op);
     SDValue Mask = Op.getOperand(4);
     SDValue DataToCompress = Op.getOperand(3);
     SDValue Addr = Op.getOperand(2);
@@ -17513,7 +17592,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
   case TRUNCATE_TO_MEM_VI32:
     return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
   case EXPAND_FROM_MEM: {
-    SDLoc dl(Op);
     SDValue Mask = Op.getOperand(4);
     SDValue PassThru = Op.getOperand(3);
     SDValue Addr = Op.getOperand(2);
@@ -17533,6 +17611,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                            Mask, PassThru, Subtarget, DAG), Chain};
     return DAG.getMergeValues(Results, dl);
   }
+  case LOADU:
+  case LOADA: {
+    SDValue Mask = Op.getOperand(4);
+    SDValue PassThru = Op.getOperand(3);
+    SDValue Addr = Op.getOperand(2);
+    SDValue Chain = Op.getOperand(0);
+    MVT VT = Op.getSimpleValueType();
+
+    MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
+    assert(MemIntr && "Expected MemIntrinsicSDNode!");
+
+    if (isAllOnesConstant(Mask)) // return just a load
+      return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand());
+
+    MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+    SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+    return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT,
+                             MemIntr->getMemOperand(), ISD::NON_EXTLOAD);
+  }
   }
 }
 
@@ -19512,24 +19609,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
   MVT SrcVT = Op.getOperand(0).getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
 
-  if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
+  if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
+      SrcVT == MVT::i64) {
     assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
     if (DstVT != MVT::f64)
       // This conversion needs to be expanded.
       return SDValue();
 
-    SDValue InVec = Op->getOperand(0);
-    SDLoc dl(Op);
-    unsigned NumElts = SrcVT.getVectorNumElements();
-    MVT SVT = SrcVT.getVectorElementType();
-
-    // Widen the vector in input in the case of MVT::v2i32.
-    // Example: from MVT::v2i32 to MVT::v4i32.
+    SDValue Op0 = Op->getOperand(0);
     SmallVector<SDValue, 16> Elts;
-    for (unsigned i = 0, e = NumElts; i != e; ++i)
-      Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
-                                 DAG.getIntPtrConstant(i, dl)));
-
+    SDLoc dl(Op);
+    unsigned NumElts;
+    MVT SVT;
+    if (SrcVT.isVector()) {
+      NumElts = SrcVT.getVectorNumElements();
+      SVT = SrcVT.getVectorElementType();
+
+      // Widen the vector in input in the case of MVT::v2i32.
+      // Example: from MVT::v2i32 to MVT::v4i32.
+      for (unsigned i = 0, e = NumElts; i != e; ++i)
+        Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
+                                   DAG.getIntPtrConstant(i, dl)));
+    } else {
+      assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
+             "Unexpected source type in LowerBITCAST");
+      Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+                                 DAG.getIntPtrConstant(0, dl)));
+      Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+                                 DAG.getIntPtrConstant(1, dl)));
+      NumElts = 2;
+      SVT = MVT::i32;
+    }
     // Explicitly mark the extra elements as Undef.
     Elts.append(NumElts, DAG.getUNDEF(SVT));
 
@@ -20685,6 +20795,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VSHLI:              return "X86ISD::VSHLI";
   case X86ISD::VSRLI:              return "X86ISD::VSRLI";
   case X86ISD::VSRAI:              return "X86ISD::VSRAI";
+  case X86ISD::VROTLI:             return "X86ISD::VROTLI";
+  case X86ISD::VROTRI:             return "X86ISD::VROTRI";
   case X86ISD::CMPP:               return "X86ISD::CMPP";
   case X86ISD::PCMPEQ:             return "X86ISD::PCMPEQ";
   case X86ISD::PCMPGT:             return "X86ISD::PCMPGT";
@@ -23184,7 +23296,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
     return false;
   SmallVector<int, 16> OpMask;
   bool IsUnary;
-  bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary);
+  bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, true, OpMask, IsUnary);
   // We only can combine unary shuffles which we can decode the mask for.
   if (!HaveMask || !IsUnary)
     return false;
@@ -23281,7 +23393,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
   MVT VT = N.getSimpleValueType();
   SmallVector<int, 4> Mask;
   bool IsUnary;
-  bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary);
+  bool HaveMask = getTargetShuffleMask(N.getNode(), VT, false, Mask, IsUnary);
   (void)HaveMask;
   assert(HaveMask);
 
@@ -23854,6 +23966,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
 
   SDValue InVec = N->getOperand(0);
   SDValue EltNo = N->getOperand(1);
+  EVT EltVT = N->getValueType(0);
 
   if (!isa<ConstantSDNode>(EltNo))
     return SDValue();
@@ -23882,14 +23995,22 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
 
   SmallVector<int, 16> ShuffleMask;
   bool UnaryShuffle;
-  if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
+  if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
                             ShuffleMask, UnaryShuffle))
     return SDValue();
 
   // Select the input vector, guarding against out of range extract vector.
   unsigned NumElems = CurrentVT.getVectorNumElements();
   int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-  int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+  int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
+
+  if (Idx == SM_SentinelZero)
+    return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
+                             : DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
+  if (Idx == SM_SentinelUndef)
+    return DAG.getUNDEF(EltVT);
+
+  assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
   SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
                                          : InVec.getOperand(1);
 
@@ -23914,7 +24035,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
   if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
     return SDValue();
 
-  EVT EltVT = N->getValueType(0);
   // If there's a bitcast before the shuffle, check if the load type and
   // alignment is valid.
   unsigned Align = LN0->getAlignment();
@@ -27233,6 +27353,32 @@ static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
   return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags);
 }
 
+/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
+/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
+/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
+/// extends from AH (which we otherwise need to do contortions to access).
+static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  auto OpcodeN = N->getOpcode();
+  auto OpcodeN0 = N0.getOpcode();
+  if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
+        (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  EVT InVT = N0.getValueType();
+  if (N0.getResNo() != 1 || InVT != MVT::i8 || VT != MVT::i32)
+    return SDValue();
+
+  SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
+  auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
+                                               : X86ISD::UDIVREM8_ZEXT_HREG;
+  SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
+                          N0.getOperand(1));
+  DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
+  return R.getValue(1);
+}
+
 static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   const X86Subtarget *Subtarget) {
@@ -27243,18 +27389,8 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
   EVT InSVT = InVT.getScalarType();
   SDLoc DL(N);
 
-  // (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
-  // (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
-  // This exposes the sext to the sdivrem lowering, so that it directly extends
-  // from AH (which we otherwise need to do contortions to access).
-  if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
-      InVT == MVT::i8 && VT == MVT::i32) {
-    SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
-    SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, DL, NodeTys,
-                            N0.getOperand(0), N0.getOperand(1));
-    DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
-    return R.getValue(1);
-  }
+  if (SDValue DivRem8 = getDivRem8(N, DAG))
+    return DivRem8;
 
   if (!DCI.isBeforeLegalizeOps()) {
     if (InVT == MVT::i1) {
@@ -27413,19 +27549,8 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
     if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
       return R;
 
-  // (i8,i32 zext (udivrem (i8 x, i8 y)) ->
-  // (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
-  // This exposes the zext to the udivrem lowering, so that it directly extends
-  // from AH (which we otherwise need to do contortions to access).
-  if (N0.getOpcode() == ISD::UDIVREM &&
-      N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
-      VT == MVT::i32) {
-    SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
-    SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
-                            N0.getOperand(0), N0.getOperand(1));
-    DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
-    return R.getValue(1);
-  }
+  if (SDValue DivRem8 = getDivRem8(N, DAG))
+    return DivRem8;
 
   return SDValue();
 }
@@ -27923,7 +28048,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::FANDN:       return PerformFANDNCombine(N, DAG, Subtarget);
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
-// TODO: refactor the [SU]DIVREM8_[SZ]EXT_HREG code so that it's not duplicated.
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG, DCI, Subtarget);
   case ISD::SIGN_EXTEND:    return PerformSExtCombine(N, DAG, DCI, Subtarget);
@@ -28763,3 +28887,51 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
                                    Attribute::MinSize);
   return OptSize && !VT.isVector();
 }
+
+void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+  if (!Subtarget->is64Bit())
+    return;
+
+  // Update IsSplitCSR in X86MachineFunctionInfo.
+  X86MachineFunctionInfo *AFI =
+    Entry->getParent()->getInfo<X86MachineFunctionInfo>();
+  AFI->setIsSplitCSR(true);
+}
+
+void X86TargetLowering::insertCopiesSplitCSR(
+    MachineBasicBlock *Entry,
+    const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+  if (!IStart)
+    return;
+
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  for (const MCPhysReg *I = IStart; *I; ++I) {
+    const TargetRegisterClass *RC = nullptr;
+    if (X86::GR64RegClass.contains(*I))
+      RC = &X86::GR64RegClass;
+    else
+      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+    unsigned NewVR = MRI->createVirtualRegister(RC);
+    // Create copy from CSR to a virtual register.
+    // FIXME: this currently does not emit CFI pseudo-instructions, it works
+    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+    // nounwind. If we want to generalize this later, we may need to emit
+    // CFI pseudo-instructions.
+    assert(Entry->getParent()->getFunction()->hasFnAttribute(
+               Attribute::NoUnwind) &&
+           "Function should be nounwind in insertCopiesSplitCSR!");
+    Entry->addLiveIn(*I);
+    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+            NewVR)
+        .addReg(*I);
+
+    for (auto *Exit : Exits)
+      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+              *I)
+          .addReg(NewVR);
+  }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 8bb0e5f..0ab786e 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -316,6 +316,9 @@ namespace llvm {
       // Vector shift elements by immediate
       VSHLI, VSRLI, VSRAI,
 
+      // Bit rotate by immediate
+      VROTLI, VROTRI,
+
       // Vector packed double/float comparison.
       CMPP,
 
@@ -837,6 +840,13 @@ namespace llvm {
     /// from i32 to i8 but not from i32 to i16.
     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
 
+    /// Given an intrinsic, checks if on the target the intrinsic will need to map
+    /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
+    /// true and stores the intrinsic information into the IntrinsicInfo that was
+    /// passed to the function.
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+                            unsigned Intrinsic) const override;
+
     /// Returns true if the target can instruction select the
     /// specified FP immediate natively. If false, the legalizer will
     /// materialize the FP immediate as a load from a constant pool.
@@ -1057,6 +1067,15 @@ namespace llvm {
                         const SmallVectorImpl<SDValue> &OutVals,
                         SDLoc dl, SelectionDAG &DAG) const override;
 
+    bool supportSplitCSR(MachineFunction *MF) const override {
+      return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+          MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+    }
+    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+    void insertCopiesSplitCSR(
+      MachineBasicBlock *Entry,
+      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
 
     bool mayBeEmittedAsTailCall(CallInst *CI) const override;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 0a27c33..49be648 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -188,7 +188,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
   let isCommutable = IsCommutable in
     def NAME: AVX512<O, F, Outs, Ins,
                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
-                                     "$dst , "#IntelSrcAsm#"}",
+                                     "$dst, "#IntelSrcAsm#"}",
                        Pattern, itin>;
 
   // Prefer over VMOV*rrk Pat<>
@@ -323,18 +323,16 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
                                   string OpcodeStr,
                                   string AttSrcAsm, string IntelSrcAsm,
                                   list<dag> Pattern,
-                                  list<dag> MaskingPattern,
-                                  string Round = "",
-                                  InstrItinClass itin = NoItinerary> {
+                                  list<dag> MaskingPattern> {
     def NAME: AVX512<O, F, Outs, Ins,
-                       OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
-                                     "$dst "#Round#", "#IntelSrcAsm#"}",
-                       Pattern, itin>;
+                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
+                                     "$dst, "#IntelSrcAsm#"}",
+                       Pattern, NoItinerary>;
 
     def NAME#k: AVX512<O, F, Outs, MaskingIns,
-                       OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"#
-                                     "$dst {${mask}}, "#IntelSrcAsm#Round#"}",
-                       MaskingPattern, itin>, EVEX_K;
+                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
+                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
+                       MaskingPattern, NoItinerary>, EVEX_K;
 }
 
 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
@@ -342,33 +340,27 @@ multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
                                   dag Ins, dag MaskingIns,
                                   string OpcodeStr,
                                   string AttSrcAsm, string IntelSrcAsm,
-                                  dag RHS, dag MaskingRHS,
-                                  string Round = "",
-                                  InstrItinClass itin = NoItinerary> :
+                                  dag RHS, dag MaskingRHS> :
   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
                          AttSrcAsm, IntelSrcAsm,
                          [(set _.KRC:$dst, RHS)],
-                         [(set _.KRC:$dst, MaskingRHS)],
-                         Round, NoItinerary>;
+                         [(set _.KRC:$dst, MaskingRHS)]>;
 
 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
                            dag Outs, dag Ins, string OpcodeStr,
                            string AttSrcAsm, string IntelSrcAsm,
-                           dag RHS, string Round = "",
-                           InstrItinClass itin = NoItinerary> :
+                           dag RHS> :
    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
                           !con((ins _.KRCWM:$mask), Ins),
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
-                          (and _.KRCWM:$mask, RHS),
-                          Round, itin>;
+                          (and _.KRCWM:$mask, RHS)>;
 
 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
                            dag Outs, dag Ins, string OpcodeStr,
                            string AttSrcAsm, string IntelSrcAsm> :
    AVX512_maskable_custom_cmp<O, F, Outs,
                              Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
-                             AttSrcAsm, IntelSrcAsm,
-                             [],[],"", NoItinerary>;
+                             AttSrcAsm, IntelSrcAsm, [],[]>;
 
 // Bitcasts between 512-bit vector types. Return the original type since
 // no instruction is needed for the conversion
@@ -1294,7 +1286,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
              (ins _.RC:$src1, _.RC:$src2),
              !strconcat(OpcodeStr,
-             "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"),
+             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
              []>, EVEX_4V;
   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -1311,7 +1303,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
              (ins _.RC:$src1, _.MemOp:$src2),
              !strconcat(OpcodeStr,
-             "\t{$src2, $src1, ${dst} |${dst},  $src1, $src2}"),
+             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
@@ -1426,7 +1418,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
                      (outs _.KRC:$dst),
                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
                      "vcmp${cc}"#_.Suffix,
-                     "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
                      (OpNodeRnd (_.VT _.RC:$src1),
                                 (_.VT _.RC:$src2),
                                 imm:$cc,
@@ -1449,7 +1441,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
                        (outs _.KRC:$dst),
                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
                        "vcmp"#_.Suffix,
-                       "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">,
+                       "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
                        EVEX_4V, EVEX_B;
   }// let isAsmParserOnly = 1, hasSideEffects = 0
 
@@ -1831,7 +1823,7 @@ multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
                      "vcmp${cc}"#_.Suffix,
-                     "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
                      (X86cmpmRnd (_.VT _.RC:$src1),
                                     (_.VT _.RC:$src2),
                                     imm:$cc,
@@ -1842,8 +1834,8 @@ multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
                          (outs _.KRC:$dst),
                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
                          "vcmp"#_.Suffix,
-                         "$cc,{sae}, $src2, $src1",
-                         "$src1, $src2,{sae}, $cc">, EVEX_B;
+                         "$cc, {sae}, $src2, $src1",
+                         "$src1, $src2, {sae}, $cc">, EVEX_B;
    }
 }
 
@@ -1889,13 +1881,13 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
   let Predicates = [prd] in {
       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
                       (ins _.RC:$src1, i32u8imm:$src2),
-                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
                               (i32 imm:$src2)))], NoItinerary>;
       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix#
-                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
                       [(set _.KRC:$dst,(or _.KRCWM:$mask, 
                                       (OpNode (_.VT _.RC:$src1),
                                       (i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1903,14 +1895,14 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
       def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                       (ins _.MemOp:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix##
-                                "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set _.KRC:$dst,
                             (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
                                     (i32 imm:$src2)))], NoItinerary>;
       def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                       (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix##
-                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
                       [(set _.KRC:$dst,(or _.KRCWM:$mask, 
                           (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
                               (i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1925,13 +1917,13 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                  X86VectorVTInfo _, string mem, string broadcast>{
   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
                       (ins _.RC:$src1, i32u8imm:$src2),
-                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
                                        (i32 imm:$src2)))], NoItinerary>;
   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix#
-                      "\t{$src2, $src1, $dst {${mask}}| $dst {${mask}}, $src1, $src2}",
+                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
                       [(set _.KRC:$dst,(or _.KRCWM:$mask, 
                                        (OpNode (_.VT _.RC:$src1),
                                        (i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1939,21 +1931,21 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                       (ins _.MemOp:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix##mem#
-                      "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set _.KRC:$dst,(OpNode 
                                        (_.VT (bitconvert (_.LdFrag addr:$src1))),
                                        (i32 imm:$src2)))], NoItinerary>;
     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                       (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix##mem#
-                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
                       [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode 
                                     (_.VT (bitconvert (_.LdFrag addr:$src1))),
                                     (i32 imm:$src2))))], NoItinerary>, EVEX_K;
     def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                       (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
-                                        _.BroadcastStr##", $dst | $dst, ${src1}"
+                                        _.BroadcastStr##", $dst|$dst, ${src1}"
                                                     ##_.BroadcastStr##", $src2}",
                       [(set _.KRC:$dst,(OpNode 
                                        (_.VT (X86VBroadcast 
@@ -1962,7 +1954,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
     def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                       (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
-                            _.BroadcastStr##", $dst {${mask}} | $dst {${mask}}, ${src1}"##
+                            _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
                                                      _.BroadcastStr##", $src2}",
                       [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode 
                                        (_.VT (X86VBroadcast 
@@ -2715,30 +2707,6 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>,
                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
                PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
-def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
-                (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
-       (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
-                 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
-       (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
-
-def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
-                (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
-       (VMOVAPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
-                 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
-       (VMOVAPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
-
-def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
-                (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
-       (VMOVAPDZrm addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
-                 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
-       (VMOVAPSZrm addr:$ptr)>;
-
 def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
           GR16:$mask),
          (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
@@ -4088,8 +4056,8 @@ defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
 
-defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V;
-defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V;
+defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V;
+defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V;
 
 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
@@ -6057,12 +6025,12 @@ multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
   let mayStore = 1 in {
     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
                (ins x86memop:$dst, SrcInfo.RC:$src),
-               OpcodeStr # "\t{$src, $dst |$dst, $src}",
+               OpcodeStr # "\t{$src, $dst|$dst, $src}",
                []>, EVEX;
 
     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
-               OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
                []>, EVEX, EVEX_K;
   }//mayStore = 1
 }
@@ -6666,12 +6634,12 @@ multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
   let mayStore = 1 in {
   def mr : AVX5128I<opc, MRMDestMem, (outs),
               (ins _.MemOp:$dst, _.RC:$src),
-              OpcodeStr # "\t{$src, $dst |$dst, $src}",
+              OpcodeStr # "\t{$src, $dst|$dst, $src}",
               []>, EVEX_CD8<_.EltSize, CD8VT1>;
 
   def mrk : AVX5128I<opc, MRMDestMem, (outs),
               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
               [(store (_.VT (vselect _.KRCWM:$mask,
                              (_.VT (X86compress  _.RC:$src)), _.ImmAllZerosV)),
                 addr:$dst)]>,
@@ -6766,7 +6734,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _>{
   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, i32u8imm:$src2),
-                      OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
+                      OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
                       "$src1, {sae}, $src2",
                       (OpNode (_.VT _.RC:$src1),
                               (i32 imm:$src2),
@@ -6895,8 +6863,8 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _>{
   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
-                      OpcodeStr, "$src3,{sae}, $src2, $src1",
-                      "$src1, $src2,{sae}, $src3",
+                      OpcodeStr, "$src3, {sae}, $src2, $src1",
+                      "$src1, $src2, {sae}, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
                               (i32 imm:$src3),
@@ -6907,8 +6875,8 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _> {
   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
-                      OpcodeStr, "$src3,{sae}, $src2, $src1",
-                      "$src1, $src2,{sae}, $src3",
+                      OpcodeStr, "$src3, {sae}, $src2, $src1",
+                      "$src1, $src2, {sae}, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
                               (i32 imm:$src3),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
index c4b2d6d..af43d9f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -98,22 +98,22 @@ let hasSideEffects = 0, isCodeGenOnly = 1 in {
 def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
                          (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
-                         [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
+                         [], IIC_MOVZX>, TB, OpSize32, Sched<[WriteALU]>;
 let mayLoad = 1 in
 def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
                          (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
-                         [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
+                         [], IIC_MOVZX>, TB, OpSize32, Sched<[WriteALULd]>;
 
 def MOVSX32_NOREXrr8 : I<0xBE, MRMSrcReg,
                          (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
                          "movs{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
-                         [], IIC_MOVSX>, TB, Sched<[WriteALU]>;
+                         [], IIC_MOVSX>, TB, OpSize32, Sched<[WriteALU]>;
 let mayLoad = 1 in
 def MOVSX32_NOREXrm8 : I<0xBE, MRMSrcMem,
                          (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
                          "movs{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
-                         [], IIC_MOVSX>, TB, Sched<[WriteALULd]>;
+                         [], IIC_MOVSX>, TB, OpSize32, Sched<[WriteALULd]>;
 }
 
 // MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -146,18 +146,22 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     Sched<[WriteALULd]>, Requires<[In64BitMode]>;
 
 // movzbq and movzwq encodings for the disassembler
-def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
-                       "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB, Sched<[WriteALU]>;
-def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
-                       "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB, Sched<[WriteALULd]>;
-def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                       "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB, Sched<[WriteALU]>;
-def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                       "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB, Sched<[WriteALULd]>;
+let hasSideEffects = 0 in {
+def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+                     "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+                     TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+                     "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+                     TB, Sched<[WriteALULd]>;
+def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                     "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+                     TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                     "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+                     TB, Sched<[WriteALULd]>;
+}
 
 // 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
 // 32-bit register.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 829cedd..6432863 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -225,6 +225,9 @@ def X86vshli   : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
 def X86vsrli   : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
 def X86vsrai   : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
 
+def X86vrotli  : SDNode<"X86ISD::VROTLI", SDTIntShiftOp>;
+def X86vrotri  : SDNode<"X86ISD::VROTRI", SDTIntShiftOp>;
+
 def X86vprot   : SDNode<"X86ISD::VPROT",
                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                              SDTCisSameAs<0,2>]>>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index ea8e562..9c8339a 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1273,7 +1273,7 @@ def STOSW : I<0xAB, RawFrmDst, (outs dstidx16:$dst), (ins),
 let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
 def STOSL : I<0xAB, RawFrmDst, (outs dstidx32:$dst), (ins),
               "stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in
 def STOSQ : RI<0xAB, RawFrmDst, (outs dstidx64:$dst), (ins),
                "stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>;
 
@@ -2755,56 +2755,56 @@ def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
 
 // Disambiguate the mem/imm form of bt-without-a-suffix as btl.
 // Likewise for btc/btr/bts.
-def : InstAlias<"bt {$imm, $mem|$mem, $imm}",
+def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}",
                 (BT32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"btc {$imm, $mem|$mem, $imm}",
+def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}",
                 (BTC32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"btr {$imm, $mem|$mem, $imm}",
+def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}",
                 (BTR32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"bts {$imm, $mem|$mem, $imm}",
+def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}",
                 (BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
 
 // clr aliases.
-def : InstAlias<"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg), 0>;
-def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg), 0>;
-def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg), 0>;
-def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg), 0>;
+def : InstAlias<"clrb\t$reg", (XOR8rr  GR8 :$reg, GR8 :$reg), 0>;
+def : InstAlias<"clrw\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>;
+def : InstAlias<"clrl\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>;
+def : InstAlias<"clrq\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>;
 
 // lods aliases. Accept the destination being omitted because it's implicit
 // in the mnemonic, or the mnemonic suffix being omitted because it's implicit
 // in the destination.
-def : InstAlias<"lodsb $src", (LODSB srcidx8:$src),  0>;
-def : InstAlias<"lodsw $src", (LODSW srcidx16:$src), 0>;
-def : InstAlias<"lods{l|d} $src", (LODSL srcidx32:$src), 0>;
-def : InstAlias<"lodsq $src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"lods {$src, %al|al, $src}", (LODSB srcidx8:$src),  0>;
-def : InstAlias<"lods {$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
-def : InstAlias<"lods {$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
-def : InstAlias<"lods {$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"lodsb\t$src", (LODSB srcidx8:$src),  0>;
+def : InstAlias<"lodsw\t$src", (LODSW srcidx16:$src), 0>;
+def : InstAlias<"lods{l|d}\t$src", (LODSL srcidx32:$src), 0>;
+def : InstAlias<"lodsq\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src),  0>;
+def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
+def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
+def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
 
 // stos aliases. Accept the source being omitted because it's implicit in
 // the mnemonic, or the mnemonic suffix being omitted because it's implicit
 // in the source.
-def : InstAlias<"stosb $dst", (STOSB dstidx8:$dst),  0>;
-def : InstAlias<"stosw $dst", (STOSW dstidx16:$dst), 0>;
-def : InstAlias<"stos{l|d} $dst", (STOSL dstidx32:$dst), 0>;
-def : InstAlias<"stosq $dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"stos {%al, $dst|$dst, al}", (STOSB dstidx8:$dst),  0>;
-def : InstAlias<"stos {%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
-def : InstAlias<"stos {%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
-def : InstAlias<"stos {%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"stosb\t$dst", (STOSB dstidx8:$dst),  0>;
+def : InstAlias<"stosw\t$dst", (STOSW dstidx16:$dst), 0>;
+def : InstAlias<"stos{l|d}\t$dst", (STOSL dstidx32:$dst), 0>;
+def : InstAlias<"stosq\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst),  0>;
+def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
+def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
+def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
 
 // scas aliases. Accept the destination being omitted because it's implicit
 // in the mnemonic, or the mnemonic suffix being omitted because it's implicit
 // in the destination.
-def : InstAlias<"scasb $dst", (SCASB dstidx8:$dst),  0>;
-def : InstAlias<"scasw $dst", (SCASW dstidx16:$dst), 0>;
-def : InstAlias<"scas{l|d} $dst", (SCASL dstidx32:$dst), 0>;
-def : InstAlias<"scasq $dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"scas {$dst, %al|al, $dst}", (SCASB dstidx8:$dst),  0>;
-def : InstAlias<"scas {$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
-def : InstAlias<"scas {$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
-def : InstAlias<"scas {$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"scasb\t$dst", (SCASB dstidx8:$dst),  0>;
+def : InstAlias<"scasw\t$dst", (SCASW dstidx16:$dst), 0>;
+def : InstAlias<"scas{l|d}\t$dst", (SCASL dstidx32:$dst), 0>;
+def : InstAlias<"scasq\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst),  0>;
+def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
 
 // div and idiv aliases for explicit A register.
 def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r  GR8 :$src)>;
@@ -2892,30 +2892,30 @@ def : InstAlias<"fnstsw"     , (FNSTSW16r)>;
 
 // lcall and ljmp aliases.  This seems to be an odd mapping in 64-bit mode, but
 // this is compatible with what GAS does.
-def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp $seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall {*}$dst",    (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp {*}$dst",     (FARJMP32m  opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp $seg, $off",  (FARJMP16i  i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"lcall {*}$dst",    (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp {*}$dst",     (FARJMP16m  opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
-
-def : InstAlias<"call {*}$dst",     (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"jmp {*}$dst",      (JMP64m  i64mem:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"call {*}$dst",     (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"jmp {*}$dst",      (JMP32m  i32mem:$dst), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"call {*}$dst",     (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp {*}$dst",      (JMP16m  i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp\t$seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall\t{*}$dst",    (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp\t{*}$dst",     (FARJMP32m  opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp\t$seg, $off",  (FARJMP16i  i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall\t{*}$dst",    (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp\t{*}$dst",     (FARJMP16m  opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+
+def : InstAlias<"call\t{*}$dst",     (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"jmp\t{*}$dst",      (JMP64m  i64mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"call\t{*}$dst",     (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"jmp\t{*}$dst",      (JMP32m  i32mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"call\t{*}$dst",     (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp\t{*}$dst",      (JMP16m  i16mem:$dst), 0>, Requires<[In16BitMode]>;
 
 
 // "imul <imm>, B" is an alias for "imul <imm>, B, B".
-def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri  GR16:$r, GR16:$r, i16imm:$imm), 0>;
-def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
-def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri  GR32:$r, GR32:$r, i32imm:$imm), 0>;
-def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
-def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
-def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
+def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri  GR16:$r, GR16:$r, i16imm:$imm), 0>;
+def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
+def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri  GR32:$r, GR32:$r, i32imm:$imm), 0>;
+def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
+def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
+def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
 
 // inb %dx -> inb %al, %dx
 def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
@@ -2927,46 +2927,46 @@ def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>;
 
 
 // jmp and call aliases for lcall and ljmp.  jmp $42,$5 -> ljmp
-def : InstAlias<"call $seg, $off",  (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp $seg, $off",   (FARJMP16i  i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"call $seg, $off",  (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
-def : InstAlias<"jmp $seg, $off",   (FARJMP32i  i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
-def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
-def : InstAlias<"jmpw $seg, $off",  (FARJMP16i  i16imm:$off, i16imm:$seg)>;
-def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
-def : InstAlias<"jmpl $seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"call\t$seg, $off",  (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp\t$seg, $off",   (FARJMP16i  i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
+def : InstAlias<"call\t$seg, $off",  (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
+def : InstAlias<"jmp\t$seg, $off",   (FARJMP32i  i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
+def : InstAlias<"callw\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw\t$seg, $off",  (FARJMP16i  i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl\t$seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg)>;
 
 // Force mov without a suffix with a segment and mem to prefer the 'l' form of
 // the move.  All segment/mem forms are equivalent, this has the shortest
 // encoding.
-def : InstAlias<"mov {$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
-def : InstAlias<"mov {$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
+def : InstAlias<"mov\t{$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
+def : InstAlias<"mov\t{$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
 
 // Match 'movq <largeimm>, <reg>' as an alias for movabsq.
-def : InstAlias<"movq {$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
+def : InstAlias<"movq\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
 
 // Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq {$src, $dst|$dst, $src}",
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
                 (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
-def : InstAlias<"movq {$src, $dst|$dst, $src}",
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
                 (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
 
 // movsx aliases
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
 
 // movzx aliases
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr16 GR64:$dst, GR16:$src), 0>;
 // Note: No GR32->GR64 movzx form.
 
 // outb %dx -> outb %al, %dx
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMPX.td b/contrib/llvm/lib/Target/X86/X86InstrMPX.td
index 31608cd..71ab973 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMPX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMPX.td
@@ -15,10 +15,10 @@
 
 multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
   def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src),
-              OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+              OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
               Requires<[HasMPX, Not64BitMode]>;
   def 64rm: RI<opc, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
-              OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+              OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
               Requires<[HasMPX, In64BitMode]>;
 }
 
@@ -26,16 +26,16 @@ defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
 
 multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
   def 32rm: I<opc, MRMSrcMem, (outs), (ins  BNDR:$src1, i32mem:$src2),
-              OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
               Requires<[HasMPX, Not64BitMode]>;
   def 64rm: RI<opc, MRMSrcMem, (outs), (ins  BNDR:$src1, i64mem:$src2),
-              OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
               Requires<[HasMPX, In64BitMode]>;
   def 32rr: I<opc, MRMSrcReg, (outs), (ins  BNDR:$src1, GR32:$src2),
-              OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
               Requires<[HasMPX, Not64BitMode]>;
   def 64rr: RI<opc, MRMSrcReg, (outs), (ins  BNDR:$src1, GR64:$src2),
-              OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
               Requires<[HasMPX, In64BitMode]>;
 }
 defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS;
@@ -43,28 +43,28 @@ defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD;
 defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD;
 
 def BNDMOVRMrr   : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
-                    "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+                    "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
                     Requires<[HasMPX]>;
 def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
-                    "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+                    "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
                     Requires<[HasMPX, Not64BitMode]>;
 def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
-                    "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+                    "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
                     Requires<[HasMPX, In64BitMode]>;
 
 def BNDMOVMRrr   : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
-                    "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+                    "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
                     Requires<[HasMPX]>;
 def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs i64mem:$dst), (ins BNDR:$src),
-                    "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+                    "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
                     Requires<[HasMPX, Not64BitMode]>;
 def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src),
-                    "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+                    "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
                     Requires<[HasMPX, In64BitMode]>;
 
 def BNDSTXmr:      I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
-                    "bndstx \t{$src, $dst|$dst, $src}", []>, PS,
+                    "bndstx\t{$src, $dst|$dst, $src}", []>, PS,
                     Requires<[HasMPX]>;
 def BNDLDXrm:      I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
-                    "bndldx \t{$src, $dst|$dst, $src}", []>, PS,
+                    "bndldx\t{$src, $dst|$dst, $src}", []>, PS,
                     Requires<[HasMPX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 624b931..6a7c456 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1808,7 +1808,7 @@ def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
 def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
                 (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
 def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
-                (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
+                (CVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
 
 /// SSE 2 Only
 
@@ -7838,9 +7838,7 @@ class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
   AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
         [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
-        Sched<[Sched]>, VEX {
-    let mayLoad = 1;
-}
+        Sched<[Sched]>, VEX;
 
 // AVX2 adds register forms
 class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -7871,7 +7869,7 @@ let ExeDomain = SSEPackedDouble in
 def VBROADCASTSDYrr  : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
                                          v4f64, v2f64, WriteFShuffle256>, VEX_L;
 
-let mayLoad = 1, Predicates = [HasAVX2] in
+let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
 def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
                            (ins i128mem:$src),
                            "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
@@ -8259,6 +8257,9 @@ let Predicates = [HasF16C] in {
             (VCVTPH2PSrm addr:$src)>;
   def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
             (VCVTPH2PSrm addr:$src)>;
+  def : Pat<(int_x86_vcvtph2ps_128 (bitconvert
+              (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+            (VCVTPH2PSrm addr:$src)>;
 
   def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
                   (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 646b556..b525d5e 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -29,7 +29,7 @@ enum IntrinsicType {
   INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
   COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
   TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
-  EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
+  EXPAND_FROM_MEM, LOADA, LOADU, BLEND, INSERT_SUBVEC,
   TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
 };
 
@@ -143,6 +143,18 @@ static const IntrinsicData IntrinsicsWithChain[] = {
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_pd_128, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_pd_256, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_pd_512, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_ps_128, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_ps_256, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_ps_512, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_pd_128, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_pd_256, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_pd_512, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_ps_128, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_ps_256, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_ps_512, LOADU, ISD::DELETED_NODE, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
@@ -1129,6 +1141,42 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VTRUNCS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_128,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_256,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_512,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_128,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_256,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_512,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_128,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_256,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_512,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_128,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_256,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_512,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_128,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_256,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_512,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_128,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_256,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_512,  INTR_TYPE_1OP_MASK,
+                     X86ISD::VSEXT, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
@@ -1165,6 +1213,42 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VZEXT, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
                      X86ISD::PMULDQ, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
@@ -1201,12 +1285,54 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
   X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
   X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prol_d_128,  INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prol_d_256,  INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prol_d_512,  INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prol_q_128,  INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prol_q_256,  INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prol_q_512,  INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prolv_d_128, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prolv_d_256, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prolv_d_512, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prolv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prolv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prolv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pror_d_128,  INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pror_d_256,  INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pror_d_512,  INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pror_q_128,  INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pror_q_256,  INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pror_q_512,  INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),  
+  X86_INTRINSIC_DATA(avx512_mask_prorv_d_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prorv_d_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prorv_d_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
                      X86ISD::PSHUFB, 0),
   X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
                     X86ISD::PSHUFB, 0),
   X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
                     X86ISD::PSHUFB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshuf_d_128, INTR_TYPE_2OP_MASK, 
+                    X86ISD::PSHUFD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshuf_d_256, INTR_TYPE_2OP_MASK, 
+                    X86ISD::PSHUFD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshuf_d_512, INTR_TYPE_2OP_MASK, 
+                    X86ISD::PSHUFD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshufh_w_128,  INTR_TYPE_2OP_MASK,
+                    X86ISD::PSHUFHW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshufh_w_256,  INTR_TYPE_2OP_MASK,
+                    X86ISD::PSHUFHW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshufh_w_512,  INTR_TYPE_2OP_MASK,
+                    X86ISD::PSHUFHW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshufl_w_128,  INTR_TYPE_2OP_MASK,
+                    X86ISD::PSHUFLW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshufl_w_256,  INTR_TYPE_2OP_MASK,
+                    X86ISD::PSHUFLW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshufl_w_512,  INTR_TYPE_2OP_MASK, 
+                    X86ISD::PSHUFLW, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_d,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_d_128,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_d_256,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
@@ -1219,8 +1345,21 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_psll_qi_128,   INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_qi_256,   INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_qi_512,   INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psll_w_128,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psll_w_256,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psll_w_512,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psll_wi_128,   INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psll_wi_256,   INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psll_wi_512,   INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_pslli_d,       VSHIFT_MASK, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_pslli_q,       VSHIFT_MASK, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psllv16_hi,    INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psllv2_di,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psllv32hi,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psllv4_di,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psllv4_si,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psllv8_hi,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psllv8_si,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psllv_d,       INTR_TYPE_2OP_MASK, ISD::SHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psllv_q,       INTR_TYPE_2OP_MASK, ISD::SHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psra_d,        INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
@@ -1243,8 +1382,15 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_psra_wi_512,   INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrai_d,       VSHIFT_MASK, X86ISD::VSRAI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrai_q,       VSHIFT_MASK, X86ISD::VSRAI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav16_hi,    INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav32_hi,     INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav4_si,     INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav8_hi,     INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav8_si,     INTR_TYPE_2OP_MASK, ISD::SRA, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrav_d,       INTR_TYPE_2OP_MASK, ISD::SRA, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrav_q,       INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav_q_128,   INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrav_q_256,   INTR_TYPE_2OP_MASK, ISD::SRA, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrl_d,        INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrl_d_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrl_d_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 3a7a98d..00515dd 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -92,6 +92,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// used to address arguments in a function using a base pointer.
   int SEHFramePtrSaveIndex = 0;
 
+  /// True if this function has a subset of CSRs that is handled explicitly via
+  /// copies.
+  bool IsSplitCSR = false;
+
 private:
   /// ForwardedMustTailRegParms - A list of virtual and physical registers
   /// that must be forwarded to every musttail call.
@@ -160,6 +164,9 @@ public:
   SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
     return ForwardedMustTailRegParms;
   }
+
+  bool isSplitCSR() const { return IsSplitCSR; }
+  void setIsSplitCSR(bool s) { IsSplitCSR = s; }
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 58020d9..45cc0ae 100644
--- a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -9,8 +9,10 @@
 //
 // This file defines the pass that performs some optimizations with LEA
 // instructions in order to improve code size.
-// Currently, it does one thing:
-// 1) Address calculations in load and store instructions are replaced by
+// Currently, it does two things:
+// 1) If there are two LEA instructions calculating addresses which only differ
+//    by displacement inside a basic block, one of them is removed.
+// 2) Address calculations in load and store instructions are replaced by
 //    existing LEA def registers where possible.
 //
 //===----------------------------------------------------------------------===//
@@ -38,6 +40,7 @@ static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden,
                                      cl::init(false));
 
 STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
 
 namespace {
 class OptimizeLEAPass : public MachineFunctionPass {
@@ -71,6 +74,13 @@ private:
   /// \brief Returns true if the instruction is LEA.
   bool isLEA(const MachineInstr &MI);
 
+  /// \brief Returns true if the \p Last LEA instruction can be replaced by the
+  /// \p First. The difference between displacements of the addresses calculated
+  /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper
+  /// replacement of the \p Last LEA's uses with the \p First's def register.
+  bool isReplaceable(const MachineInstr &First, const MachineInstr &Last,
+                     int64_t &AddrDispShift);
+
   /// \brief Returns true if two instructions have memory operands that only
   /// differ by displacement. The numbers of the first memory operands for both
   /// instructions are specified through \p N1 and \p N2. The address
@@ -79,13 +89,20 @@ private:
                       const MachineInstr &MI2, unsigned N2,
                       int64_t &AddrDispShift);
 
-  /// \brief Find all LEA instructions in the basic block.
+  /// \brief Find all LEA instructions in the basic block. Also, assign position
+  /// numbers to all instructions in the basic block to speed up calculation of
+  /// distance between them.
   void findLEAs(const MachineBasicBlock &MBB,
                 SmallVectorImpl<MachineInstr *> &List);
 
   /// \brief Removes redundant address calculations.
   bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);
 
+  /// \brief Removes LEAs which calculate similar addresses.
+  bool removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List);
+
+  DenseMap<const MachineInstr *, unsigned> InstrPos;
+
   MachineRegisterInfo *MRI;
   const X86InstrInfo *TII;
   const X86RegisterInfo *TRI;
@@ -99,14 +116,15 @@ FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }
 
 int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
                                    const MachineInstr &Last) {
-  const MachineBasicBlock *MBB = First.getParent();
-
-  // Both instructions must be in the same basic block.
-  assert(Last.getParent() == MBB &&
+  // Both instructions must be in the same basic block and they must be
+  // presented in InstrPos.
+  assert(Last.getParent() == First.getParent() &&
          "Instructions are in different basic blocks");
+  assert(InstrPos.find(&First) != InstrPos.end() &&
+         InstrPos.find(&Last) != InstrPos.end() &&
+         "Instructions' positions are undefined");
 
-  return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) -
-         std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First));
+  return InstrPos[&Last] - InstrPos[&First];
 }
 
 // Find the best LEA instruction in the List to replace address recalculation in
@@ -189,6 +207,69 @@ bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
          Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
 }
 
+// Check that the Last LEA can be replaced by the First LEA. To be so,
+// these requirements must be met:
+// 1) Addresses calculated by LEAs differ only by displacement.
+// 2) Def registers of LEAs belong to the same class.
+// 3) All uses of the Last LEA def register are replaceable, thus the
+//    register is used only as address base.
+bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
+                                    const MachineInstr &Last,
+                                    int64_t &AddrDispShift) {
+  assert(isLEA(First) && isLEA(Last) &&
+         "The function works only with LEA instructions");
+
+  // Compare instructions' memory operands.
+  if (!isSimilarMemOp(Last, 1, First, 1, AddrDispShift))
+    return false;
+
+  // Make sure that LEA def registers belong to the same class. There may be
+  // instructions (like MOV8mr_NOREX) which allow a limited set of registers to
+  // be used as their operands, so we must be sure that replacing one LEA
+  // with another won't lead to putting a wrong register in the instruction.
+  if (MRI->getRegClass(First.getOperand(0).getReg()) !=
+      MRI->getRegClass(Last.getOperand(0).getReg()))
+    return false;
+
+  // Loop over all uses of the Last LEA to check that its def register is
+  // used only as address base for memory accesses. If so, it can be
+  // replaced, otherwise - no.
+  for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) {
+    MachineInstr &MI = *MO.getParent();
+
+    // Get the number of the first memory operand.
+    const MCInstrDesc &Desc = MI.getDesc();
+    int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode());
+
+    // If the use instruction has no memory operand - the LEA is not
+    // replaceable.
+    if (MemOpNo < 0)
+      return false;
+
+    MemOpNo += X86II::getOperandBias(Desc);
+
+    // If the address base of the use instruction is not the LEA def register -
+    // the LEA is not replaceable.
+    if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO))
+      return false;
+
+    // If the LEA def register is used as any other operand of the use
+    // instruction - the LEA is not replaceable.
+    for (unsigned i = 0; i < MI.getNumOperands(); i++)
+      if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) &&
+          isIdenticalOp(MI.getOperand(i), MO))
+        return false;
+
+    // Check that the new address displacement will fit 4 bytes.
+    if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() &&
+        !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() +
+                   AddrDispShift))
+      return false;
+  }
+
+  return true;
+}
+
 // Check if MI1 and MI2 have memory operands which represent addresses that
 // differ only by displacement.
 bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
@@ -219,7 +300,15 @@ bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
 
 void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
                                SmallVectorImpl<MachineInstr *> &List) {
+  unsigned Pos = 0;
   for (auto &MI : MBB) {
+    // Assign the position number to the instruction. Note that we are going to
+    // move some instructions during the optimization however there will never
+    // be a need to move two instructions before any selected instruction. So to
+    // avoid multiple positions' updates during moves we just increase position
+    // counter by two leaving a free space for instructions which will be moved.
+    InstrPos[&MI] = Pos += 2;
+
     if (isLEA(MI))
       List.push_back(const_cast<MachineInstr *>(&MI));
   }
@@ -270,6 +359,13 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(
     if (Dist < 0) {
       DefMI->removeFromParent();
       MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
+      InstrPos[DefMI] = InstrPos[&MI] - 1;
+
+      // Make sure the instructions' position numbers are sane.
+      assert(((InstrPos[DefMI] == 1 && DefMI == MBB->begin()) ||
+              InstrPos[DefMI] >
+                  InstrPos[std::prev(MachineBasicBlock::iterator(DefMI))]) &&
+             "Instruction positioning is broken");
     }
 
     // Since we can possibly extend register lifetime, clear kill flags.
@@ -296,6 +392,81 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(
   return Changed;
 }
 
+// Try to find similar LEAs in the list and replace one with another.
+bool
+OptimizeLEAPass::removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List) {
+  bool Changed = false;
+
+  // Loop over all LEA pairs.
+  auto I1 = List.begin();
+  while (I1 != List.end()) {
+    MachineInstr &First = **I1;
+    auto I2 = std::next(I1);
+    while (I2 != List.end()) {
+      MachineInstr &Last = **I2;
+      int64_t AddrDispShift;
+
+      // LEAs should be in occurence order in the list, so we can freely
+      // replace later LEAs with earlier ones.
+      assert(calcInstrDist(First, Last) > 0 &&
+             "LEAs must be in occurence order in the list");
+
+      // Check that the Last LEA instruction can be replaced by the First.
+      if (!isReplaceable(First, Last, AddrDispShift)) {
+        ++I2;
+        continue;
+      }
+
+      // Loop over all uses of the Last LEA and update their operands. Note that
+      // the correctness of this has already been checked in the isReplaceable
+      // function.
+      for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()),
+                UE = MRI->use_end();
+           UI != UE;) {
+        MachineOperand &MO = *UI++;
+        MachineInstr &MI = *MO.getParent();
+
+        // Get the number of the first memory operand.
+        const MCInstrDesc &Desc = MI.getDesc();
+        int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) +
+                      X86II::getOperandBias(Desc);
+
+        // Update address base.
+        MO.setReg(First.getOperand(0).getReg());
+
+        // Update address disp.
+        MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp);
+        if (Op->isImm())
+          Op->setImm(Op->getImm() + AddrDispShift);
+        else if (Op->isGlobal())
+          Op->setOffset(Op->getOffset() + AddrDispShift);
+        else
+          llvm_unreachable("Invalid address displacement operand");
+      }
+
+      // Since we can possibly extend register lifetime, clear kill flags.
+      MRI->clearKillFlags(First.getOperand(0).getReg());
+
+      ++NumRedundantLEAs;
+      DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump(););
+
+      // By this moment, all of the Last LEA's uses must be replaced. So we can
+      // freely remove it.
+      assert(MRI->use_empty(Last.getOperand(0).getReg()) &&
+             "The LEA's def register must have no uses");
+      Last.eraseFromParent();
+
+      // Erase removed LEA from the list.
+      I2 = List.erase(I2);
+
+      Changed = true;
+    }
+    ++I1;
+  }
+
+  return Changed;
+}
+
 bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
@@ -310,6 +481,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
   // Process all basic blocks.
   for (auto &MBB : MF) {
     SmallVector<MachineInstr *, 16> LEAs;
+    InstrPos.clear();
 
     // Find all LEA instructions in basic block.
     findLEAs(MBB, LEAs);
@@ -318,6 +490,11 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
     if (LEAs.empty())
       continue;
 
+    // Remove redundant LEA instructions. The optimization may have a negative
+    // effect on performance, so do it only for -Oz.
+    if (MF.getFunction()->optForMinSize())
+      Changed |= removeRedundantLEAs(LEAs);
+
     // Remove redundant address calculations.
     Changed |= removeRedundantAddrCalc(LEAs);
   }
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 5840443..274b566 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -250,7 +250,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     return CSR_64_RT_AllRegs_SaveList;
   case CallingConv::CXX_FAST_TLS:
     if (Is64Bit)
-      return CSR_64_TLS_Darwin_SaveList;
+      return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
+             CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
     break;
   case CallingConv::Intel_OCL_BI: {
     if (HasAVX512 && IsWin64)
@@ -305,6 +306,15 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return CSR_32_SaveList;
 }
 
+const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
+    const MachineFunction *MF) const {
+  assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+      MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
+    return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
+  return nullptr;
+}
+
 const uint32_t *
 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                       CallingConv::ID CC) const {
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index f014c8f..8d0094c 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -99,6 +99,8 @@ public:
   /// callee-save registers on this target.
   const MCPhysReg *
   getCalleeSavedRegs(const MachineFunction* MF) const override;
+  const MCPhysReg *
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
   const uint32_t *getNoPreservedMask() const override;
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 816291d..6df0447 100644
--- a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -22,7 +22,7 @@ static cl::list<std::string>
     ForceAttributes("force-attribute", cl::Hidden,
                     cl::desc("Add an attribute to a function. This should be a "
                              "pair of 'function-name:attribute-name', for "
-                             "example -force-add-attribute=foo:noinline. This "
+                             "example -force-attribute=foo:noinline. This "
                              "option can be specified multiple times."));
 
 static Attribute::AttrKind parseAttrKind(StringRef Kind) {
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 6dcfb3f..527fdd1 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -6,16 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements a simple interprocedural pass which walks the
-// call-graph, looking for functions which do not access or only read
-// non-local memory, and marking them readnone/readonly.  It does the
-// same with function arguments independently, marking them readonly/
-// readnone/nocapture.  Finally, well-known library call declarations
-// are marked with all attributes that are consistent with the
-// function's standard definition. This pass is implemented as a
-// bottom-up traversal of the call-graph.
-//
+///
+/// \file
+/// This file implements interprocedural passes which walk the
+/// call-graph deducing and/or propagating function attributes.
+///
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO.h"
@@ -57,19 +52,14 @@ typedef SmallSetVector<Function *, 8> SCCNodeSet;
 }
 
 namespace {
-struct FunctionAttrs : public CallGraphSCCPass {
+struct PostOrderFunctionAttrs : public CallGraphSCCPass {
   static char ID; // Pass identification, replacement for typeid
-  FunctionAttrs() : CallGraphSCCPass(ID) {
-    initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+  PostOrderFunctionAttrs() : CallGraphSCCPass(ID) {
+    initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnSCC(CallGraphSCC &SCC) override;
-  bool doInitialization(CallGraph &CG) override {
-    Revisit.clear();
-    return false;
-  }
-  bool doFinalization(CallGraph &CG) override;
-  
+
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     AU.addRequired<AssumptionCacheTracker>();
@@ -79,20 +69,19 @@ struct FunctionAttrs : public CallGraphSCCPass {
 
 private:
   TargetLibraryInfo *TLI;
-  SmallVector<WeakVH,16> Revisit;
 };
 }
 
-char FunctionAttrs::ID = 0;
-INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+char PostOrderFunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs",
                       "Deduce function attributes", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs",
                     "Deduce function attributes", false, false)
 
-Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); }
 
 namespace {
 /// The three kinds of memory access relevant to 'readonly' and
@@ -949,8 +938,7 @@ static bool setDoesNotRecurse(Function &F) {
   return true;
 }
 
-static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
-                              SmallVectorImpl<WeakVH> &Revisit) {
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC) {
   // Try and identify functions that do not recurse.
 
   // If the SCC contains multiple nodes we know for sure there is recursion.
@@ -973,32 +961,11 @@ static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
     // Function calls a potentially recursive function.
     return setDoesNotRecurse(*F);
 
-  // We know that F is not obviously recursive, but we haven't been able to
-  // prove that it doesn't actually recurse. Add it to the Revisit list to try
-  // again top-down later.
-  Revisit.push_back(F);
+  // Nothing else we can deduce usefully during the postorder traversal.
   return false;
 }
 
-static bool addNoRecurseAttrsTopDownOnly(Function *F) {
-  // If F is internal and all uses are in norecurse functions, then F is also
-  // norecurse.
-  if (F->doesNotRecurse())
-    return false;
-  if (F->hasInternalLinkage()) {
-    for (auto *U : F->users())
-      if (auto *I = dyn_cast<Instruction>(U)) {
-        if (!I->getParent()->getParent()->doesNotRecurse())
-          return false;
-      } else {
-        return false;
-      }
-    return setDoesNotRecurse(*F);
-  }
-  return false;
-}
-
-bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   bool Changed = false;
 
@@ -1040,19 +1007,100 @@ bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
     Changed |= addNoAliasAttrs(SCCNodes);
     Changed |= addNonNullAttrs(SCCNodes, *TLI);
   }
-  
-  Changed |= addNoRecurseAttrs(SCC, Revisit);
+
+  Changed |= addNoRecurseAttrs(SCC);
   return Changed;
 }
 
-bool FunctionAttrs::doFinalization(CallGraph &CG) {
+namespace {
+/// A pass to do RPO deduction and propagation of function attributes.
+///
+/// This pass provides a general RPO or "top down" propagation of
+/// function attributes. For a few (rare) cases, we can deduce significantly
+/// more about function attributes by working in RPO, so this pass
+/// provides the compliment to the post-order pass above where the majority of
+/// deduction is performed.
+// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so
+// this is a boring module pass, but eventually it should be an RPO CGSCC pass
+// when such infrastructure is available.
+struct ReversePostOrderFunctionAttrs : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  ReversePostOrderFunctionAttrs() : ModulePass(ID) {
+    initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<CallGraphWrapperPass>();
+  }
+};
+}
+
+char ReversePostOrderFunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+                      "Deduce function attributes in RPO", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+                    "Deduce function attributes in RPO", false, false)
+
+Pass *llvm::createReversePostOrderFunctionAttrsPass() {
+  return new ReversePostOrderFunctionAttrs();
+}
+
+static bool addNoRecurseAttrsTopDown(Function &F) {
+  // We check the preconditions for the function prior to calling this to avoid
+  // the cost of building up a reversible post-order list. We assert them here
+  // to make sure none of the invariants this relies on were violated.
+  assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
+  assert(!F.doesNotRecurse() &&
+         "This function has already been deduced as norecurs!");
+  assert(F.hasInternalLinkage() &&
+         "Can only do top-down deduction for internal linkage functions!");
+
+  // If F is internal and all of its uses are calls from a non-recursive
+  // functions, then none of its calls could in fact recurse without going
+  // through a function marked norecurse, and so we can mark this function too
+  // as norecurse. Note that the uses must actually be calls -- otherwise
+  // a pointer to this function could be returned from a norecurse function but
+  // this function could be recursively (indirectly) called. Note that this
+  // also detects if F is directly recursive as F is not yet marked as
+  // a norecurse function.
+  for (auto *U : F.users()) {
+    auto *I = dyn_cast<Instruction>(U);
+    if (!I)
+      return false;
+    CallSite CS(I);
+    if (!CS || !CS.getParent()->getParent()->doesNotRecurse())
+      return false;
+  }
+  return setDoesNotRecurse(F);
+}
+
+bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) {
+  // We only have a post-order SCC traversal (because SCCs are inherently
+  // discovered in post-order), so we accumulate them in a vector and then walk
+  // it in reverse. This is simpler than using the RPO iterator infrastructure
+  // because we need to combine SCC detection and the PO walk of the call
+  // graph. We can also cheat egregiously because we're primarily interested in
+  // synthesizing norecurse and so we can only save the singular SCCs as SCCs
+  // with multiple functions in them will clearly be recursive.
+  auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+  SmallVector<Function *, 16> Worklist;
+  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+    if (I->size() != 1)
+      continue;
+
+    Function *F = I->front()->getFunction();
+    if (F && !F->isDeclaration() && !F->doesNotRecurse() &&
+        F->hasInternalLinkage())
+      Worklist.push_back(F);
+  }
+
   bool Changed = false;
-  // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
-  // the rules we have for identifying norecurse functions work best with a
-  // top-down walk, so look again at all the functions we previously marked as
-  // worth revisiting, in top-down order.
-  for (auto &F : reverse(Revisit))
-    if (F)
-      Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
+  for (auto *F : reverse(Worklist))
+    Changed |= addNoRecurseAttrsTopDown(*F);
+
   return Changed;
 }
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index d8b677b..5e0df95 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -41,15 +41,16 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
                                         LLVMContext &Context) {
   SMDiagnostic Err;
   DEBUG(dbgs() << "Loading '" << FileName << "'\n");
-  std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+  // Metadata isn't loaded or linked until after all functions are
+  // imported, after which it will be materialized and linked.
+  std::unique_ptr<Module> Result =
+      getLazyIRFileModule(FileName, Err, Context,
+                          /* ShouldLazyLoadMetadata = */ true);
   if (!Result) {
     Err.print("function-import", errs());
     return nullptr;
   }
 
-  Result->materializeMetadata();
-  UpgradeDebugInfo(*Result);
-
   return Result;
 }
 
@@ -132,6 +133,8 @@ static void findExternalCalls(const Module &DestModule, Function &F,
         // Ignore functions already present in the destination module
         auto *SrcGV = DestModule.getNamedValue(ImportedName);
         if (SrcGV) {
+          if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV))
+            SrcGV = SGA->getBaseObject();
           assert(isa<Function>(SrcGV) && "Name collision during import");
           if (!cast<Function>(SrcGV)->isDeclaration()) {
             DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
@@ -324,6 +327,10 @@ bool FunctionImporter::importFunctions(Module &DestModule) {
        ModuleToTempMDValsMap) {
     // Load the specified source module.
     auto &SrcModule = ModuleLoaderCache(SME.getKey());
+    // The modules were created with lazy metadata loading. Materialize it
+    // now, before linking it.
+    SrcModule.materializeMetadata();
+    UpgradeDebugInfo(SrcModule);
 
     // Link in all necessary metadata from this module.
     if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
@@ -408,14 +415,19 @@ public:
       Index = IndexPtr.get();
     }
 
+    // First we need to promote to global scope and rename any local values that
+    // are potentially exported to other modules.
+    if (renameModuleForThinLTO(M, Index)) {
+      errs() << "Error renaming module\n";
+      return false;
+    }
+
     // Perform the import now.
     auto ModuleLoader = [&M](StringRef Identifier) {
       return loadFile(Identifier, M.getContext());
     };
     FunctionImporter Importer(*Index, ModuleLoader);
     return Importer.importFunctions(M);
-
-    return false;
   }
 };
 } // anonymous namespace
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 7ea6c08..89629cf0 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -28,7 +28,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeDAEPass(Registry);
   initializeDAHPass(Registry);
   initializeForceFunctionAttrsLegacyPassPass(Registry);
-  initializeFunctionAttrsPass(Registry);
   initializeGlobalDCEPass(Registry);
   initializeGlobalOptPass(Registry);
   initializeIPCPPass(Registry);
@@ -42,6 +41,8 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeLowerBitSetsPass(Registry);
   initializeMergeFunctionsPass(Registry);
   initializePartialInlinerPass(Registry);
+  initializePostOrderFunctionAttrsPass(Registry);
+  initializeReversePostOrderFunctionAttrsPass(Registry);
   initializePruneEHPass(Registry);
   initializeStripDeadPrototypesLegacyPassPass(Registry);
   initializeStripSymbolsPass(Registry);
@@ -71,7 +72,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
 }
 
 void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createFunctionAttrsPass());
+  unwrap(PM)->add(createPostOrderFunctionAttrsPass());
 }
 
 void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 8e4ad64..3c6a7bb 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -38,7 +38,7 @@ namespace {
     static char ID; // Pass identification, replacement for typeid
     unsigned NumLoops;
 
-    explicit LoopExtractor(unsigned numLoops = ~0) 
+    explicit LoopExtractor(unsigned numLoops = ~0)
       : LoopPass(ID), NumLoops(numLoops) {
         initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
       }
@@ -143,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
       Changed = true;
       // After extraction, the loop is replaced by a function call, so
       // we shouldn't try to run any more loop passes on it.
-      LI.updateUnloop(L);
+      LI.markAsRemoved(L);
     }
     ++NumExtracted;
   }
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9876efa..faada9c 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -251,7 +251,7 @@ void PassManagerBuilder::populateModulePassManager(
     Inliner = nullptr;
   }
   if (!DisableUnitAtATime)
-    MPM.add(createFunctionAttrsPass());       // Set readonly/readnone attrs
+    MPM.add(createPostOrderFunctionAttrsPass());
   if (OptLevel > 2)
     MPM.add(createArgumentPromotionPass());   // Scalarize uninlined fn args
 
@@ -346,6 +346,9 @@ void PassManagerBuilder::populateModulePassManager(
   // we must insert a no-op module pass to reset the pass manager.
   MPM.add(createBarrierNoopPass());
 
+  if (!DisableUnitAtATime)
+    MPM.add(createReversePostOrderFunctionAttrsPass());
+
   if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
     // Remove avail extern fns and globals definitions if we aren't
     // compiling an object file for later LTO. For LTO we want to preserve
@@ -502,7 +505,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   PM.add(createIPSCCPPass());
 
   // Now that we internalized some globals, see if we can hack on them!
-  PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
+  PM.add(createPostOrderFunctionAttrsPass());
+  PM.add(createReversePostOrderFunctionAttrsPass());
   PM.add(createGlobalOptimizerPass());
   // Promote any localized global vars.
   PM.add(createPromoteMemoryToRegisterPass());
@@ -551,7 +555,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
     PM.add(createScalarReplAggregatesPass());
 
   // Run a few AA driven optimizations here and now, to cleanup the code.
-  PM.add(createFunctionAttrsPass()); // Add nocapture.
+  PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture.
   PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
 
   PM.add(createLICMPass());                 // Hoist loop invariants.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7ad0efc..160792b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -636,7 +636,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     // if pattern detected emit alternate sequence
     if (OpX && OpY) {
       BuilderTy::FastMathFlagGuard Guard(*Builder);
-      Builder->SetFastMathFlags(Log2->getFastMathFlags());
+      Builder->setFastMathFlags(Log2->getFastMathFlags());
       Log2->setArgOperand(0, OpY);
       Value *FMulVal = Builder->CreateFMul(OpX, Log2);
       Value *FSub = Builder->CreateFSub(FMulVal, OpX);
@@ -652,7 +652,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     bool IgnoreZeroSign = I.hasNoSignedZeros();
     if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
       BuilderTy::FastMathFlagGuard Guard(*Builder);
-      Builder->SetFastMathFlags(I.getFastMathFlags());
+      Builder->setFastMathFlags(I.getFastMathFlags());
 
       Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
       Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
@@ -693,7 +693,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
 
         if (Y) {
           BuilderTy::FastMathFlagGuard Guard(*Builder);
-          Builder->SetFastMathFlags(I.getFastMathFlags());
+          Builder->setFastMathFlags(I.getFastMathFlags());
           Value *T = Builder->CreateFMul(Opnd1, Opnd1);
 
           Value *R = Builder->CreateFMul(T, Y);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 776704d..51219bc 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -930,7 +930,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
         FCmpInst::Predicate InvPred = FCI->getInversePredicate();
         IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
-        Builder->SetFastMathFlags(FCI->getFastMathFlags());
+        Builder->setFastMathFlags(FCI->getFastMathFlags());
         Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
                                              FCI->getName() + ".inv");
 
@@ -973,7 +973,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
         FCmpInst::Predicate InvPred = FCI->getInversePredicate();
         IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
-        Builder->SetFastMathFlags(FCI->getFastMathFlags());
+        Builder->setFastMathFlags(FCI->getFastMathFlags());
         Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
                                              FCI->getName() + ".inv");
 
@@ -1082,7 +1082,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
         } else {
           IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
           auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
-          Builder->SetFastMathFlags(FMF);
+          Builder->setFastMathFlags(FMF);
           Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
         }
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 54a9fbd..5cde31a 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -384,23 +384,20 @@ static void replaceExtractElements(InsertElementInst *InsElt,
                                         ConstantVector::get(ExtendMask));
 
   // Insert the new shuffle after the vector operand of the extract is defined
-  // or at the start of the basic block, so any subsequent extracts can use it.
-  bool ReplaceAllExtUsers;
-  if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) {
+  // (as long as it's not a PHI) or at the start of the basic block of the
+  // extract, so any subsequent extracts in the same basic block can use it.
+  // TODO: Insert before the earliest ExtractElementInst that is replaced.
+  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
+  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
     WideVec->insertAfter(ExtVecOpInst);
-    ReplaceAllExtUsers = true;
-  } else {
-    // TODO: Insert at start of function, so it's always safe to replace all?
+  else
     IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
-    ReplaceAllExtUsers = false;
-  }
 
   // Replace extracts from the original narrow vector with extracts from the new
   // wide vector.
   for (User *U : ExtVecOp->users()) {
     ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
-    if (!OldExt ||
-        (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent()))
+    if (!OldExt || OldExt->getParent() != WideVec->getParent())
       continue;
     auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
     NewExt->insertAfter(WideVec);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 51ff95d..28483e7 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -93,8 +93,8 @@ private:
   /// Replace instrprof_increment with an increment of the appropriate value.
   void lowerIncrement(InstrProfIncrementInst *Inc);
 
-  /// Set up the section and uses for coverage data and its references.
-  void lowerCoverageData(GlobalVariable *CoverageData);
+  /// Force emitting of name vars for unused functions.
+  void lowerCoverageData(GlobalVariable *CoverageNamesVar);
 
   /// Get the region counters for an increment, creating them if necessary.
   ///
@@ -156,9 +156,9 @@ bool InstrProfiling::runOnModule(Module &M) {
         }
       }
 
-  if (GlobalVariable *Coverage =
-          M.getNamedGlobal(getCoverageMappingVarName())) {
-    lowerCoverageData(Coverage);
+  if (GlobalVariable *CoverageNamesVar =
+          M.getNamedGlobal(getCoverageNamesVarName())) {
+    lowerCoverageData(CoverageNamesVar);
     MadeChange = true;
   }
 
@@ -233,28 +233,16 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
   Inc->eraseFromParent();
 }
 
-void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
-
-  Constant *Init = CoverageData->getInitializer();
-  // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] }
-  // for some C. If not, the frontend's given us something broken.
-  assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map");
-  assert(isa<ConstantArray>(Init->getAggregateElement(1)) &&
-         "invalid function list in coverage map");
-  ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1));
-  for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) {
-    Constant *Record = Records->getOperand(I);
-    Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts();
+void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
 
+  ConstantArray *Names =
+      cast<ConstantArray>(CoverageNamesVar->getInitializer());
+  for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
+    Constant *NC = Names->getOperand(I);
+    Value *V = NC->stripPointerCasts();
     assert(isa<GlobalVariable>(V) && "Missing reference to function name");
     GlobalVariable *Name = cast<GlobalVariable>(V);
 
-    // If we have region counters for this name, we've already handled it.
-    auto It = ProfileDataMap.find(Name);
-    if (It != ProfileDataMap.end())
-      if (It->second.RegionCounters)
-        continue;
-
     // Move the name variable to the right section.
     Name->setSection(getNameSection());
     Name->setAlignment(1);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5a7bce5..34aaa7f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -692,7 +692,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     const DataLayout &DL = F.getParent()->getDataLayout();
     unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
-    if (isa<StructType>(Shadow->getType())) {
+    if (Shadow->getType()->isAggregateType()) {
       paintOrigin(IRB, updateOrigin(Origin, IRB),
                   getOriginPtr(Addr, IRB, Alignment), StoreSize,
                   OriginAlignment);
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 087ce8a..dcdcfed 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -100,9 +100,9 @@ namespace {
     std::unique_ptr<BranchProbabilityInfo> BPI;
     bool HasProfileData;
 #ifdef NDEBUG
-    SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+    SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
 #else
-    SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+    SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders;
 #endif
     DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
 
@@ -163,6 +163,7 @@ namespace {
 
     bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
     bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+    bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
 
   private:
     BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
@@ -210,11 +211,12 @@ bool JumpThreading::runOnFunction(Function &F) {
   // we will loop forever. We take care of this issue by not jump threading for
   // back edges. This works for normal cases but not for unreachable blocks as
   // they may have cycle with no back edge.
-  removeUnreachableBlocks(F);
+  bool EverChanged = false;
+  EverChanged |= removeUnreachableBlocks(F, LVI);
 
   FindLoopHeaders(F);
 
-  bool Changed, EverChanged = false;
+  bool Changed;
   do {
     Changed = false;
     for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
@@ -363,8 +365,8 @@ void JumpThreading::FindLoopHeaders(Function &F) {
   SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
   FindFunctionBackedges(F, Edges);
 
-  for (unsigned i = 0, e = Edges.size(); i != e; ++i)
-    LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+  for (const auto &Edge : Edges)
+    LoopHeaders.insert(Edge.second);
 }
 
 /// getKnownConstant - Helper method to determine if we can thread over a
@@ -410,8 +412,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
 
   // If V is a constant, then it is known in all predecessors.
   if (Constant *KC = getKnownConstant(V, Preference)) {
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-      Result.push_back(std::make_pair(KC, *PI));
+    for (BasicBlock *Pred : predecessors(BB))
+      Result.push_back(std::make_pair(KC, Pred));
 
     return true;
   }
@@ -434,8 +436,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
     // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
     // Perhaps getConstantOnEdge should be smart enough to do this?
 
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-      BasicBlock *P = *PI;
+    for (BasicBlock *P : predecessors(BB)) {
       // If the value is known by LazyValueInfo to be a constant in a
       // predecessor, use that information to try to thread this block.
       Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
@@ -491,22 +492,17 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
 
       // Scan for the sentinel.  If we find an undef, force it to the
       // interesting value: x|undef -> true and x&undef -> false.
-      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
-        if (LHSVals[i].first == InterestingVal ||
-            isa<UndefValue>(LHSVals[i].first)) {
-          Result.push_back(LHSVals[i]);
-          Result.back().first = InterestingVal;
-          LHSKnownBBs.insert(LHSVals[i].second);
+      for (const auto &LHSVal : LHSVals)
+        if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
+          Result.emplace_back(InterestingVal, LHSVal.second);
+          LHSKnownBBs.insert(LHSVal.second);
         }
-      for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
-        if (RHSVals[i].first == InterestingVal ||
-            isa<UndefValue>(RHSVals[i].first)) {
+      for (const auto &RHSVal : RHSVals)
+        if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
           // If we already inferred a value for this block on the LHS, don't
           // re-add it.
-          if (!LHSKnownBBs.count(RHSVals[i].second)) {
-            Result.push_back(RHSVals[i]);
-            Result.back().first = InterestingVal;
-          }
+          if (!LHSKnownBBs.count(RHSVal.second))
+            Result.emplace_back(InterestingVal, RHSVal.second);
         }
 
       return !Result.empty();
@@ -522,8 +518,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
         return false;
 
       // Invert the known values.
-      for (unsigned i = 0, e = Result.size(); i != e; ++i)
-        Result[i].first = ConstantExpr::getNot(Result[i].first);
+      for (auto &R : Result)
+        R.first = ConstantExpr::getNot(R.first);
 
       return true;
     }
@@ -538,12 +534,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
                                       WantInteger, CxtI);
 
       // Try to use constant folding to simplify the binary operator.
-      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
-        Constant *V = LHSVals[i].first;
+      for (const auto &LHSVal : LHSVals) {
+        Constant *V = LHSVal.first;
         Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
 
         if (Constant *KC = getKnownConstant(Folded, WantInteger))
-          Result.push_back(std::make_pair(KC, LHSVals[i].second));
+          Result.push_back(std::make_pair(KC, LHSVal.second));
       }
     }
 
@@ -591,8 +587,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
           cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
         Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
 
-        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
-          BasicBlock *P = *PI;
+        for (BasicBlock *P : predecessors(BB)) {
           // If the value is known by LazyValueInfo to be a constant in a
           // predecessor, use that information to try to thread this block.
           LazyValueInfo::Tristate Res =
@@ -615,12 +610,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
         ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
                                         WantInteger, CxtI);
 
-        for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
-          Constant *V = LHSVals[i].first;
+        for (const auto &LHSVal : LHSVals) {
+          Constant *V = LHSVal.first;
           Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
                                                       V, CmpConst);
           if (Constant *KC = getKnownConstant(Folded, WantInteger))
-            Result.push_back(std::make_pair(KC, LHSVals[i].second));
+            Result.push_back(std::make_pair(KC, LHSVal.second));
         }
 
         return !Result.empty();
@@ -637,8 +632,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
     if ((TrueVal || FalseVal) &&
         ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
                                         WantInteger, CxtI)) {
-      for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
-        Constant *Cond = Conds[i].first;
+      for (auto &C : Conds) {
+        Constant *Cond = C.first;
 
         // Figure out what value to use for the condition.
         bool KnownCond;
@@ -655,7 +650,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
 
         // See if the select has a known constant value for this predecessor.
         if (Constant *Val = KnownCond ? TrueVal : FalseVal)
-          Result.push_back(std::make_pair(Val, Conds[i].second));
+          Result.push_back(std::make_pair(Val, C.second));
       }
 
       return !Result.empty();
@@ -665,8 +660,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
   // If all else fails, see if LVI can figure out a constant value for us.
   Constant *CI = LVI->getConstant(V, BB, CxtI);
   if (Constant *KC = getKnownConstant(CI, Preference)) {
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-      Result.push_back(std::make_pair(KC, *PI));
+    for (BasicBlock *Pred : predecessors(BB))
+      Result.push_back(std::make_pair(KC, Pred));
   }
 
   return !Result.empty();
@@ -736,6 +731,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
     }
   }
 
+  if (TryToUnfoldSelectInCurrBB(BB))
+    return true;
+
   // What kind of constant we're looking for.
   ConstantPreference Preference = WantInteger;
 
@@ -988,10 +986,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 
   // If we got here, the loaded value is transparent through to the start of the
   // block.  Check to see if it is available in any of the predecessor blocks.
-  for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
-       PI != PE; ++PI) {
-    BasicBlock *PredBB = *PI;
-
+  for (BasicBlock *PredBB : predecessors(LoadBB)) {
     // If we already scanned this predecessor, skip it.
     if (!PredsScanned.insert(PredBB).second)
       continue;
@@ -1038,13 +1033,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     SmallVector<BasicBlock*, 8> PredsToSplit;
     SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
 
-    for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
-      AvailablePredSet.insert(AvailablePreds[i].first);
+    for (const auto &AvailablePred : AvailablePreds)
+      AvailablePredSet.insert(AvailablePred.first);
 
     // Add all the unavailable predecessors to the PredsToSplit list.
-    for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
-         PI != PE; ++PI) {
-      BasicBlock *P = *PI;
+    for (BasicBlock *P : predecessors(LoadBB)) {
       // If the predecessor is an indirect goto, we can't split the edge.
       if (isa<IndirectBrInst>(P->getTerminator()))
         return false;
@@ -1129,9 +1122,9 @@ FindMostPopularDest(BasicBlock *BB,
   // blocks with known and real destinations to threading undef.  We'll handle
   // them later if interesting.
   DenseMap<BasicBlock*, unsigned> DestPopularity;
-  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
-    if (PredToDestList[i].second)
-      DestPopularity[PredToDestList[i].second]++;
+  for (const auto &PredToDest : PredToDestList)
+    if (PredToDest.second)
+      DestPopularity[PredToDest.second]++;
 
   // Find the most popular dest.
   DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
@@ -1194,10 +1187,10 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
          "ComputeValueKnownInPredecessors returned true with no values");
 
   DEBUG(dbgs() << "IN BB: " << *BB;
-        for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+        for (const auto &PredValue : PredValues) {
           dbgs() << "  BB '" << BB->getName() << "': FOUND condition = "
-            << *PredValues[i].first
-            << " for pred '" << PredValues[i].second->getName() << "'.\n";
+            << *PredValue.first
+            << " for pred '" << PredValue.second->getName() << "'.\n";
         });
 
   // Decide what we want to thread through.  Convert our list of known values to
@@ -1210,8 +1203,8 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
   BasicBlock *OnlyDest = nullptr;
   BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
 
-  for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
-    BasicBlock *Pred = PredValues[i].second;
+  for (const auto &PredValue : PredValues) {
+    BasicBlock *Pred = PredValue.second;
     if (!SeenPreds.insert(Pred).second)
       continue;  // Duplicate predecessor entry.
 
@@ -1220,7 +1213,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
     if (isa<IndirectBrInst>(Pred->getTerminator()))
       continue;
 
-    Constant *Val = PredValues[i].first;
+    Constant *Val = PredValue.first;
 
     BasicBlock *DestBB;
     if (isa<UndefValue>(Val))
@@ -1260,16 +1253,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
   // Now that we know what the most popular destination is, factor all
   // predecessors that will jump to it into a single predecessor.
   SmallVector<BasicBlock*, 16> PredsToFactor;
-  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
-    if (PredToDestList[i].second == MostPopularDest) {
-      BasicBlock *Pred = PredToDestList[i].first;
+  for (const auto &PredToDest : PredToDestList)
+    if (PredToDest.second == MostPopularDest) {
+      BasicBlock *Pred = PredToDest.first;
 
       // This predecessor may be a switch or something else that has multiple
       // edges to the block.  Factor each of these edges by listing them
       // according to # occurrences in PredsToFactor.
-      TerminatorInst *PredTI = Pred->getTerminator();
-      for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
-        if (PredTI->getSuccessor(i) == BB)
+      for (BasicBlock *Succ : successors(Pred))
+        if (Succ == BB)
           PredsToFactor.push_back(Pred);
     }
 
@@ -1366,11 +1358,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
   // Scan the information to see which is most popular: true or false.  The
   // predecessors can be of the set true, false, or undef.
   unsigned NumTrue = 0, NumFalse = 0;
-  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
-    if (isa<UndefValue>(XorOpValues[i].first))
+  for (const auto &XorOpValue : XorOpValues) {
+    if (isa<UndefValue>(XorOpValue.first))
       // Ignore undefs for the count.
       continue;
-    if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+    if (cast<ConstantInt>(XorOpValue.first)->isZero())
       ++NumFalse;
     else
       ++NumTrue;
@@ -1386,12 +1378,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
   // Collect all of the blocks that this can be folded into so that we can
   // factor this once and clone it once.
   SmallVector<BasicBlock*, 8> BlocksToFoldInto;
-  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
-    if (XorOpValues[i].first != SplitVal &&
-        !isa<UndefValue>(XorOpValues[i].first))
+  for (const auto &XorOpValue : XorOpValues) {
+    if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
       continue;
 
-    BlocksToFoldInto.push_back(XorOpValues[i].second);
+    BlocksToFoldInto.push_back(XorOpValue.second);
   }
 
   // If we inferred a value for all of the predecessors, then duplication won't
@@ -1543,10 +1534,10 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   // PHI insertion, of which we are prepared to do, clean these up now.
   SSAUpdater SSAUpdate;
   SmallVector<Use*, 16> UsesToRename;
-  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+  for (Instruction &I : *BB) {
     // Scan all uses of this instruction to see if it is used outside of its
     // block, and if so, record them in UsesToRename.
-    for (Use &U : I->uses()) {
+    for (Use &U : I.uses()) {
       Instruction *User = cast<Instruction>(U.getUser());
       if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
         if (UserPN->getIncomingBlock(U) == BB)
@@ -1561,14 +1552,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     if (UsesToRename.empty())
       continue;
 
-    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
 
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
-    SSAUpdate.Initialize(I->getType(), I->getName());
-    SSAUpdate.AddAvailableValue(BB, &*I);
-    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
+    SSAUpdate.Initialize(I.getType(), I.getName());
+    SSAUpdate.AddAvailableValue(BB, &I);
+    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
 
     while (!UsesToRename.empty())
       SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1644,10 +1635,10 @@ void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
   // Collect updated outgoing edges' frequencies from BB and use them to update
   // edge probabilities.
   SmallVector<uint64_t, 4> BBSuccFreq;
-  for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
-    auto SuccFreq = (*I == SuccBB)
+  for (BasicBlock *Succ : successors(BB)) {
+    auto SuccFreq = (Succ == SuccBB)
                         ? BB2SuccBBFreq - NewBBFreq
-                        : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+                        : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
     BBSuccFreq.push_back(SuccFreq.getFrequency());
   }
 
@@ -1783,10 +1774,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   // PHI insertion, of which we are prepared to do, clean these up now.
   SSAUpdater SSAUpdate;
   SmallVector<Use*, 16> UsesToRename;
-  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+  for (Instruction &I : *BB) {
     // Scan all uses of this instruction to see if it is used outside of its
     // block, and if so, record them in UsesToRename.
-    for (Use &U : I->uses()) {
+    for (Use &U : I.uses()) {
       Instruction *User = cast<Instruction>(U.getUser());
       if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
         if (UserPN->getIncomingBlock(U) == BB)
@@ -1801,14 +1792,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     if (UsesToRename.empty())
       continue;
 
-    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
 
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
-    SSAUpdate.Initialize(I->getType(), I->getName());
-    SSAUpdate.AddAvailableValue(BB, &*I);
-    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
+    SSAUpdate.Initialize(I.getType(), I.getName());
+    SSAUpdate.AddAvailableValue(BB, &I);
+    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
 
     while (!UsesToRename.empty())
       SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1903,3 +1894,62 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
   }
   return false;
 }
+
+/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form
+/// bb:
+///   %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
+///   %s = select p, trueval, falseval
+///
+/// And expand the select into a branch structure. This later enables
+/// jump-threading over bb in this pass.
+///
+/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
+/// select if the associated PHI has at least one constant.  If the unfolded
+/// select is not jump-threaded, it will be folded again in the later
+/// optimizations.
+bool JumpThreading::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+  // If threading this would thread across a loop header, don't thread the edge.
+  // See the comments above FindLoopHeaders for justifications and caveats.
+  if (LoopHeaders.count(BB))
+    return false;
+
+  // Look for a Phi/Select pair in the same basic block.  The Phi feeds the
+  // condition of the Select and at least one of the incoming values is a
+  // constant.
+  for (BasicBlock::iterator BI = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+    unsigned NumPHIValues = PN->getNumIncomingValues();
+    if (NumPHIValues == 0 || !PN->hasOneUse())
+      continue;
+
+    SelectInst *SI = dyn_cast<SelectInst>(PN->user_back());
+    if (!SI || SI->getParent() != BB)
+      continue;
+
+    Value *Cond = SI->getCondition();
+    if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1))
+      continue;
+
+    bool HasConst = false;
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      if (PN->getIncomingBlock(i) == BB)
+        return false;
+      if (isa<ConstantInt>(PN->getIncomingValue(i)))
+        HasConst = true;
+    }
+
+    if (HasConst) {
+      // Expand the select.
+      TerminatorInst *Term =
+          SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+      PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
+      NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
+      NewPN->addIncoming(SI->getFalseValue(), BB);
+      SI->replaceAllUsesWith(NewPN);
+      SI->eraseFromParent();
+      return true;
+    }
+  }
+  
+  return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index e01e23f..8923ff7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -203,9 +203,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   CurAST = new AliasSetTracker(*AA);
   // Collect Alias info from subloops.
-  for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
-       LoopItr != LoopItrE; ++LoopItr) {
-    Loop *InnerL = *LoopItr;
+  for (Loop *InnerL : L->getSubLoops()) {
     AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
     assert(InnerAST && "Where is my AST?");
 
@@ -227,9 +225,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Because subloops have already been incorporated into AST, we skip blocks in
   // subloops.
   //
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
+  for (BasicBlock *BB : L->blocks()) {
     if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops.
       CurAST->add(*BB);                 // Incorporate the specified basic block
   }
@@ -263,9 +259,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
     PredIteratorCache PIC;
 
     // Loop over all of the alias sets in the tracker object.
-    for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
-         I != E; ++I)
-      Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts, 
+    for (AliasSet &AS : *CurAST)
+      Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
                                               PIC, LI, DT, CurLoop, 
                                               CurAST, &SafetyInfo);
 
@@ -324,9 +319,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
 
   // We are processing blocks in reverse dfo, so process children first.
   const std::vector<DomTreeNode*> &Children = N->getChildren();
-  for (unsigned i = 0, e = Children.size(); i != e; ++i)
-    Changed |=
-        sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+  for (DomTreeNode *Child : Children)
+    Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+
   // Only need to process the contents of this block if it is not part of a
   // subloop (which would already have been processed).
   if (inSubLoop(BB,CurLoop,LI)) return Changed;
@@ -407,9 +402,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
     }
 
   const std::vector<DomTreeNode*> &Children = N->getChildren();
-  for (unsigned i = 0, e = Children.size(); i != e; ++i)
-    Changed |=
-        hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+  for (DomTreeNode *Child : Children)
+    Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
   return Changed;
 }
 
@@ -499,9 +493,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
       // If this call only reads from memory and there are no writes to memory
       // in the loop, we can hoist or sink the call as appropriate.
       bool FoundMod = false;
-      for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
-           I != E; ++I) {
-        AliasSet &AS = *I;
+      for (AliasSet &AS : *CurAST) {
         if (!AS.isForwardingAliasSet() && AS.isMod()) {
           FoundMod = true;
           break;
@@ -783,8 +775,8 @@ static bool isGuaranteedToExecute(const Instruction &Inst,
   CurLoop->getExitBlocks(ExitBlocks);
 
   // Verify that the block dominates each of the exit blocks of the loop.
-  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-    if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+  for (BasicBlock *ExitBlock : ExitBlocks)
+    if (!DT->dominates(Inst.getParent(), ExitBlock))
       return false;
 
   // As a degenerate case, if the loop is statically infinite then we haven't
@@ -951,17 +943,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
 
       // If there is an non-load/store instruction in the loop, we can't promote
       // it.
-      if (const LoadInst *load = dyn_cast<LoadInst>(UI)) {
-        assert(!load->isVolatile() && "AST broken");
-        if (!load->isSimple())
+      if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) {
+        assert(!Load->isVolatile() && "AST broken");
+        if (!Load->isSimple())
           return Changed;
-      } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) {
+      } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
         // pointer.
         if (UI->getOperand(1) != ASIV)
           continue;
-        assert(!store->isVolatile() && "AST broken");
-        if (!store->isSimple())
+        assert(!Store->isVolatile() && "AST broken");
+        if (!Store->isSimple())
           return Changed;
         // Don't sink stores from loops without dedicated block exits. Exits
         // containing indirect branches are not transformed by loop simplify,
@@ -979,7 +971,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
         // restrictive (and performant) alignment and if we are sure this
         // instruction will be executed, update the alignment.
         // Larger is better, with the exception of 0 being the best alignment.
-        unsigned InstAlignment = store->getAlignment();
+        unsigned InstAlignment = Store->getAlignment();
         if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
           if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
             GuaranteedToExecute = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index bc00ff3..7b1940b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -245,7 +245,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
     loopInfo.removeBlock(BB);
 
   // The last step is to update LoopInfo now that we've eliminated this loop.
-  loopInfo.updateUnloop(L);
+  loopInfo.markAsRemoved(L);
   Changed = true;
 
   ++NumDeleted;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 56ae5c0..ecef6db 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -39,16 +39,16 @@ using namespace llvm;
 #define DEBUG_TYPE "loop-unroll"
 
 static cl::opt<unsigned>
-    UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+    UnrollThreshold("unroll-threshold", cl::Hidden,
                     cl::desc("The baseline cost threshold for loop unrolling"));
 
 static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
-    "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden,
+    "unroll-percent-dynamic-cost-saved-threshold", cl::Hidden,
     cl::desc("The percentage of estimated dynamic cost which must be saved by "
              "unrolling to allow unrolling up to the max threshold."));
 
 static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
-    "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden,
+    "unroll-dynamic-cost-savings-discount", cl::Hidden,
     cl::desc("This is the amount discounted from the total unroll cost when "
              "the unrolled form has a high dynamic cost savings (triggered by "
              "the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
@@ -59,17 +59,17 @@ static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
              "iterations when checking full unroll profitability"));
 
 static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+UnrollCount("unroll-count", cl::Hidden,
   cl::desc("Use this unroll count for all loops including those with "
            "unroll_count pragma values, for testing purposes"));
 
 static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
   cl::desc("Allows loops to be partially unrolled until "
            "-unroll-threshold loop size is reached."));
 
 static cl::opt<bool>
-UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
   cl::desc("Unroll loops with run-time trip counts"));
 
 static cl::opt<unsigned>
@@ -77,182 +77,95 @@ PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden
   cl::desc("Unrolled size limit for loops with an unroll(full) or "
            "unroll_count pragma."));
 
-namespace {
-  class LoopUnroll : public LoopPass {
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
-      CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
-      CurrentPercentDynamicCostSavedThreshold =
-          UnrollPercentDynamicCostSavedThreshold;
-      CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
-      CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
-      CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
-      CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
-
-      UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
-      UserPercentDynamicCostSavedThreshold =
-          (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0);
-      UserDynamicCostSavingsDiscount =
-          (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0);
-      UserAllowPartial = (P != -1) ||
-                         (UnrollAllowPartial.getNumOccurrences() > 0);
-      UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
-      UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
-
-      initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
-    }
 
-    /// A magic value for use with the Threshold parameter to indicate
-    /// that the loop unroll should be performed regardless of how much
-    /// code expansion would result.
-    static const unsigned NoThreshold = UINT_MAX;
-
-    // Threshold to use when optsize is specified (and there is no
-    // explicit -unroll-threshold).
-    static const unsigned OptSizeUnrollThreshold = 50;
-
-    // Default unroll count for loops with run-time trip count if
-    // -unroll-count is not set
-    static const unsigned UnrollRuntimeCount = 8;
-
-    unsigned CurrentCount;
-    unsigned CurrentThreshold;
-    unsigned CurrentPercentDynamicCostSavedThreshold;
-    unsigned CurrentDynamicCostSavingsDiscount;
-    bool CurrentAllowPartial;
-    bool CurrentRuntime;
-
-    // Flags for whether the 'current' settings are user-specified.
-    bool UserCount;
-    bool UserThreshold;
-    bool UserPercentDynamicCostSavedThreshold;
-    bool UserDynamicCostSavingsDiscount;
-    bool UserAllowPartial;
-    bool UserRuntime;
-
-    bool runOnLoop(Loop *L, LPPassManager &) override;
-
-    /// This transformation requires natural loop information & requires that
-    /// loop preheaders be inserted into the CFG...
-    ///
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addRequiredID(LCSSAID);
-      AU.addPreservedID(LCSSAID);
-      AU.addRequired<ScalarEvolutionWrapperPass>();
-      AU.addPreserved<ScalarEvolutionWrapperPass>();
-      AU.addRequired<TargetTransformInfoWrapperPass>();
-      // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
-      // If loop unroll does not preserve dom info then LCSSA pass on next
-      // loop will receive invalid dom info.
-      // For now, recreate dom info, if loop is unrolled.
-      AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addPreserved<GlobalsAAWrapperPass>();
-    }
+/// A magic value for use with the Threshold parameter to indicate
+/// that the loop unroll should be performed regardless of how much
+/// code expansion would result.
+static const unsigned NoThreshold = UINT_MAX;
 
-    // Fill in the UnrollingPreferences parameter with values from the
-    // TargetTransformationInfo.
-    void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
-                                 TargetTransformInfo::UnrollingPreferences &UP) {
-      UP.Threshold = CurrentThreshold;
-      UP.PercentDynamicCostSavedThreshold =
-          CurrentPercentDynamicCostSavedThreshold;
-      UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount;
-      UP.OptSizeThreshold = OptSizeUnrollThreshold;
-      UP.PartialThreshold = CurrentThreshold;
-      UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
-      UP.Count = CurrentCount;
-      UP.MaxCount = UINT_MAX;
-      UP.Partial = CurrentAllowPartial;
-      UP.Runtime = CurrentRuntime;
-      UP.AllowExpensiveTripCount = false;
-      TTI.getUnrollingPreferences(L, UP);
-    }
+/// Default unroll count for loops with run-time trip count if
+/// -unroll-count is not set
+static const unsigned DefaultUnrollRuntimeCount = 8;
 
-    // Select and return an unroll count based on parameters from
-    // user, unroll preferences, unroll pragmas, or a heuristic.
-    // SetExplicitly is set to true if the unroll count is is set by
-    // the user or a pragma rather than selected heuristically.
-    unsigned
-    selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
-                      unsigned PragmaCount,
-                      const TargetTransformInfo::UnrollingPreferences &UP,
-                      bool &SetExplicitly);
-
-    // Select threshold values used to limit unrolling based on a
-    // total unrolled size.  Parameters Threshold and PartialThreshold
-    // are set to the maximum unrolled size for fully and partially
-    // unrolled loops respectively.
-    void selectThresholds(const Loop *L, bool UsePragmaThreshold,
-                          const TargetTransformInfo::UnrollingPreferences &UP,
-                          unsigned &Threshold, unsigned &PartialThreshold,
-                          unsigned &PercentDynamicCostSavedThreshold,
-                          unsigned &DynamicCostSavingsDiscount) {
-      // Determine the current unrolling threshold.  While this is
-      // normally set from UnrollThreshold, it is overridden to a
-      // smaller value if the current function is marked as
-      // optimize-for-size, and the unroll threshold was not user
-      // specified.
-      Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
-      PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
-      PercentDynamicCostSavedThreshold =
-          UserPercentDynamicCostSavedThreshold
-              ? CurrentPercentDynamicCostSavedThreshold
-              : UP.PercentDynamicCostSavedThreshold;
-      DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount
-                                       ? CurrentDynamicCostSavingsDiscount
-                                       : UP.DynamicCostSavingsDiscount;
-
-      if (!UserThreshold &&
-          // FIXME: Use Function::optForSize().
-          L->getHeader()->getParent()->hasFnAttribute(
-              Attribute::OptimizeForSize)) {
-        Threshold = UP.OptSizeThreshold;
-        PartialThreshold = UP.PartialOptSizeThreshold;
-      }
-      if (UsePragmaThreshold) {
-        // If the loop has an unrolling pragma, we want to be more
-        // aggressive with unrolling limits.  Set thresholds to at
-        // least the PragmaTheshold value which is larger than the
-        // default limits.
-        if (Threshold != NoThreshold)
-          Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
-        if (PartialThreshold != NoThreshold)
-          PartialThreshold =
-              std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
-      }
-    }
-    bool canUnrollCompletely(Loop *L, unsigned Threshold,
-                             unsigned PercentDynamicCostSavedThreshold,
-                             unsigned DynamicCostSavingsDiscount,
-                             uint64_t UnrolledCost, uint64_t RolledDynamicCost);
-  };
-}
+/// Gather the various unrolling parameters based on the defaults, compiler
+/// flags, TTI overrides, pragmas, and user specified parameters.
+static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
+    Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
+    Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
+    Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll,
+    bool PragmaEnableUnroll, unsigned TripCount) {
+  TargetTransformInfo::UnrollingPreferences UP;
 
-char LoopUnroll::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+  // Set up the defaults
+  UP.Threshold = 150;
+  UP.PercentDynamicCostSavedThreshold = 20;
+  UP.DynamicCostSavingsDiscount = 2000;
+  UP.OptSizeThreshold = 50;
+  UP.PartialThreshold = UP.Threshold;
+  UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
+  UP.Count = 0;
+  UP.MaxCount = UINT_MAX;
+  UP.Partial = false;
+  UP.Runtime = false;
+  UP.AllowExpensiveTripCount = false;
+
+  // Override with any target specific settings
+  TTI.getUnrollingPreferences(L, UP);
+
+  // Apply size attributes
+  if (L->getHeader()->getParent()->optForSize()) {
+    UP.Threshold = UP.OptSizeThreshold;
+    UP.PartialThreshold = UP.PartialOptSizeThreshold;
+  }
 
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
-                                 int Runtime) {
-  return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
-}
+  // Apply unroll count pragmas
+  if (PragmaCount)
+    UP.Count = PragmaCount;
+  else if (PragmaFullUnroll)
+    UP.Count = TripCount;
 
-Pass *llvm::createSimpleLoopUnrollPass() {
-  return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+  // Apply any user values specified by cl::opt
+  if (UnrollThreshold.getNumOccurrences() > 0) {
+    UP.Threshold = UnrollThreshold;
+    UP.PartialThreshold = UnrollThreshold;
+  }
+  if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
+    UP.PercentDynamicCostSavedThreshold =
+        UnrollPercentDynamicCostSavedThreshold;
+  if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
+    UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
+  if (UnrollCount.getNumOccurrences() > 0)
+    UP.Count = UnrollCount;
+  if (UnrollAllowPartial.getNumOccurrences() > 0)
+    UP.Partial = UnrollAllowPartial;
+  if (UnrollRuntime.getNumOccurrences() > 0)
+    UP.Runtime = UnrollRuntime;
+
+  // Apply user values provided by argument
+  if (UserThreshold.hasValue()) {
+    UP.Threshold = *UserThreshold;
+    UP.PartialThreshold = *UserThreshold;
+  }
+  if (UserCount.hasValue())
+    UP.Count = *UserCount;
+  if (UserAllowPartial.hasValue())
+    UP.Partial = *UserAllowPartial;
+  if (UserRuntime.hasValue())
+    UP.Runtime = *UserRuntime;
+
+  if (PragmaCount > 0 ||
+      ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) {
+    // If the loop has an unrolling pragma, we want to be more aggressive with
+    // unrolling limits. Set thresholds to at least the PragmaTheshold value
+    // which is larger than the default limits.
+    if (UP.Threshold != NoThreshold)
+      UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+    if (UP.PartialThreshold != NoThreshold)
+      UP.PartialThreshold =
+          std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+  }
+
+  return UP;
 }
 
 namespace {
@@ -793,12 +706,11 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
   L->setLoopID(NewLoopID);
 }
 
-bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
-                                     unsigned PercentDynamicCostSavedThreshold,
-                                     unsigned DynamicCostSavingsDiscount,
-                                     uint64_t UnrolledCost,
-                                     uint64_t RolledDynamicCost) {
-
+static bool canUnrollCompletely(Loop *L, unsigned Threshold,
+                                unsigned PercentDynamicCostSavedThreshold,
+                                unsigned DynamicCostSavingsDiscount,
+                                uint64_t UnrolledCost,
+                                uint64_t RolledDynamicCost) {
   if (Threshold == NoThreshold) {
     DEBUG(dbgs() << "  Can fully unroll, because no threshold is set.\n");
     return true;
@@ -846,60 +758,13 @@ bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
   return false;
 }
 
-unsigned LoopUnroll::selectUnrollCount(
-    const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
-    unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
-    bool &SetExplicitly) {
-  SetExplicitly = true;
-
-  // User-specified count (either as a command-line option or
-  // constructor parameter) has highest precedence.
-  unsigned Count = UserCount ? CurrentCount : 0;
-
-  // If there is no user-specified count, unroll pragmas have the next
-  // highest precedence.
-  if (Count == 0) {
-    if (PragmaCount) {
-      Count = PragmaCount;
-    } else if (PragmaFullUnroll) {
-      Count = TripCount;
-    }
-  }
-
-  if (Count == 0)
-    Count = UP.Count;
-
-  if (Count == 0) {
-    SetExplicitly = false;
-    if (TripCount == 0)
-      // Runtime trip count.
-      Count = UnrollRuntimeCount;
-    else
-      // Conservative heuristic: if we know the trip count, see if we can
-      // completely unroll (subject to the threshold, checked below); otherwise
-      // try to find greatest modulo of the trip count which is still under
-      // threshold value.
-      Count = TripCount;
-  }
-  if (TripCount && Count > TripCount)
-    return TripCount;
-  return Count;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
-  if (skipOptnoneFunction(L))
-    return false;
-
-  Function &F = *L->getHeader()->getParent();
-
-  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  const TargetTransformInfo &TTI =
-      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
+static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
+                            ScalarEvolution *SE, const TargetTransformInfo &TTI,
+                            AssumptionCache &AC, bool PreserveLCSSA,
+                            Optional<unsigned> ProvidedCount,
+                            Optional<unsigned> ProvidedThreshold,
+                            Optional<bool> ProvidedAllowPartial,
+                            Optional<bool> ProvidedRuntime) {
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
         << "] Loop %" << Header->getName() << "\n");
@@ -912,9 +777,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
   unsigned PragmaCount = UnrollCountPragmaValue(L);
   bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
 
-  TargetTransformInfo::UnrollingPreferences UP;
-  getUnrollingPreferences(L, TTI, UP);
-
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
   unsigned TripMultiple = 1;
@@ -929,11 +791,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
     TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
   }
 
-  // Select an initial unroll count.  This may be reduced later based
-  // on size thresholds.
-  bool CountSetExplicitly;
-  unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
-                                     PragmaCount, UP, CountSetExplicitly);
+  TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+      L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+      ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll,
+      TripCount);
+
+  unsigned Count = UP.Count;
+  bool CountSetExplicitly = Count != 0;
+  // Use a heuristic count if we didn't set anything explicitly.
+  if (!CountSetExplicitly)
+    Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
+  if (TripCount && Count > TripCount)
+    Count = TripCount;
 
   unsigned NumInlineCandidates;
   bool notDuplicatable;
@@ -955,21 +824,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
     return false;
   }
 
-  unsigned Threshold, PartialThreshold;
-  unsigned PercentDynamicCostSavedThreshold;
-  unsigned DynamicCostSavingsDiscount;
-  // Only use the high pragma threshold when we have a target unroll factor such
-  // as with "#pragma unroll N" or a pragma indicating full unrolling and the
-  // trip count is known. Otherwise we rely on the standard threshold to
-  // heuristically select a reasonable unroll count.
-  bool UsePragmaThreshold =
-      PragmaCount > 0 ||
-      ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
-
-  selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
-                   PercentDynamicCostSavedThreshold,
-                   DynamicCostSavingsDiscount);
-
   // Given Count, TripCount and thresholds determine the type of
   // unrolling which is to be performed.
   enum { Full = 0, Partial = 1, Runtime = 2 };
@@ -977,19 +831,20 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
   if (TripCount && Count == TripCount) {
     Unrolling = Partial;
     // If the loop is really small, we don't need to run an expensive analysis.
-    if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,
+    if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
                             UnrolledSize, UnrolledSize)) {
       Unrolling = Full;
     } else {
       // The loop isn't that small, but we still can fully unroll it if that
       // helps to remove a significant number of instructions.
       // To check that, run additional analysis on the loop.
-      if (Optional<EstimatedUnrollCost> Cost =
-              analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
-                                    Threshold + DynamicCostSavingsDiscount))
-        if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
-                                DynamicCostSavingsDiscount, Cost->UnrolledCost,
-                                Cost->RolledDynamicCost)) {
+      if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+              L, TripCount, DT, *SE, TTI,
+              UP.Threshold + UP.DynamicCostSavingsDiscount))
+        if (canUnrollCompletely(L, UP.Threshold,
+                                UP.PercentDynamicCostSavedThreshold,
+                                UP.DynamicCostSavingsDiscount,
+                                Cost->UnrolledCost, Cost->RolledDynamicCost)) {
           Unrolling = Full;
         }
     }
@@ -1001,23 +856,22 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
 
   // Reduce count based on the type of unrolling and the threshold values.
   unsigned OriginalCount = Count;
-  bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
-                      (UserRuntime ? CurrentRuntime : UP.Runtime);
+  bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime;
   // Don't unroll a runtime trip count loop with unroll full pragma.
   if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
     AllowRuntime = false;
   }
   if (Unrolling == Partial) {
-    bool AllowPartial = PragmaEnableUnroll ||
-                        (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
+    bool AllowPartial = PragmaEnableUnroll || UP.Partial;
     if (!AllowPartial && !CountSetExplicitly) {
       DEBUG(dbgs() << "  will not try to unroll partially because "
                    << "-unroll-allow-partial not given\n");
       return false;
     }
-    if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
+    if (UP.PartialThreshold != NoThreshold &&
+        UnrolledSize > UP.PartialThreshold) {
       // Reduce unroll count to be modulo of TripCount for partial unrolling.
-      Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2);
+      Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
       while (Count != 0 && TripCount % Count != 0)
         Count--;
     }
@@ -1029,7 +883,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
     }
     // Reduce unroll count to be the largest power-of-two factor of
     // the original count which satisfies the threshold limit.
-    while (Count != 0 && UnrolledSize > PartialThreshold) {
+    while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
       Count >>= 1;
       UnrolledSize = (LoopSize-2) * Count + 2;
     }
@@ -1086,3 +940,91 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
 
   return true;
 }
+
+namespace {
+class LoopUnroll : public LoopPass {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  LoopUnroll(Optional<unsigned> Threshold = None,
+             Optional<unsigned> Count = None,
+             Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
+      : LoopPass(ID), ProvidedCount(Count), ProvidedThreshold(Threshold),
+        ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime) {
+    initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+  }
+
+  Optional<unsigned> ProvidedCount;
+  Optional<unsigned> ProvidedThreshold;
+  Optional<bool> ProvidedAllowPartial;
+  Optional<bool> ProvidedRuntime;
+
+  bool runOnLoop(Loop *L, LPPassManager &) override {
+    if (skipOptnoneFunction(L))
+      return false;
+
+    Function &F = *L->getHeader()->getParent();
+
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+    const TargetTransformInfo &TTI =
+        getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+    return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
+                           ProvidedThreshold, ProvidedAllowPartial,
+                           ProvidedRuntime);
+  }
+
+  /// This transformation requires natural loop information & requires that
+  /// loop preheaders be inserted into the CFG...
+  ///
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addPreservedID(LoopSimplifyID);
+    AU.addRequiredID(LCSSAID);
+    AU.addPreservedID(LCSSAID);
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+    // If loop unroll does not preserve dom info then LCSSA pass on next
+    // loop will receive invalid dom info.
+    // For now, recreate dom info, if loop is unrolled.
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+                                 int Runtime) {
+  // TODO: It would make more sense for this function to take the optionals
+  // directly, but that's dangerous since it would silently break out of tree
+  // callers.
+  return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
+                        Count == -1 ? None : Optional<unsigned>(Count),
+                        AllowPartial == -1 ? None
+                                           : Optional<bool>(AllowPartial),
+                        Runtime == -1 ? None : Optional<bool>(Runtime));
+}
+
+Pass *llvm::createSimpleLoopUnrollPass() {
+  return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 7354016..6b43b0f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -529,12 +529,13 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
 
           // We found an instruction that may write to the loaded memory.
           // We can try to promote at this position instead of the store
-          // position if nothing alias the store memory after this.
+          // position if nothing alias the store memory after this and the store
+          // destination is not in the range.
           P = &*I;
           for (; I != E; ++I) {
             MemoryLocation StoreLoc = MemoryLocation::get(SI);
-            if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
-              DEBUG(dbgs() << "Alias " << *I << "\n");
+            if (&*I == SI->getOperand(1) ||
+                AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
               P = nullptr;
               break;
             }
@@ -628,13 +629,39 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   // Ensure that the value being stored is something that can be memset'able a
   // byte at a time like "0" or "-1" or any width, as well as things like
   // 0xA0A0A0A0 and 0.0.
-  if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+  auto *V = SI->getOperand(0);
+  if (Value *ByteVal = isBytewiseValue(V)) {
     if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
                                               ByteVal)) {
       BBI = I->getIterator(); // Don't invalidate iterator.
       return true;
     }
 
+    // If we have an aggregate, we try to promote it to memset regardless
+    // of opportunity for merging as it can expose optimization opportunities
+    // in subsequent passes.
+    auto *T = V->getType();
+    if (T->isAggregateType()) {
+      uint64_t Size = DL.getTypeStoreSize(T);
+      unsigned Align = SI->getAlignment();
+      if (!Align)
+        Align = DL.getABITypeAlignment(T);
+      IRBuilder<> Builder(SI);
+      auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
+                                     Size, Align, SI->isVolatile());
+
+      DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
+
+      MD->removeInstruction(SI);
+      SI->eraseFromParent();
+      NumMemSetInfer++;
+
+      // Make sure we do not invalidate the iterator.
+      BBI = M->getIterator();
+      return true;
+    }
+  }
+
   return false;
 }
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 28c610c..b56b355 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -499,7 +499,7 @@ static bool isGCSafepointPoll(Function &F) {
 static bool shouldRewriteFunction(Function &F) {
   // TODO: This should check the GCStrategy
   if (F.hasGC()) {
-    const char *FunctionGCName = F.getGC();
+    const auto &FunctionGCName = F.getGC();
     const StringRef StatepointExampleName("statepoint-example");
     const StringRef CoreCLRName("coreclr");
     return (StatepointExampleName == FunctionGCName) ||
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 401a740..bcadd4e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2155,7 +2155,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
     // During the initial run we will get to the root of the tree.
     // But if we get here while we are redoing instructions, there is no
     // guarantee that the root will be visited. So Redo later
-    if (BO->user_back() != BO)
+    if (BO->user_back() != BO &&
+        BO->getParent() == BO->user_back()->getParent())
       RedoInsts.insert(BO->user_back());
     return;
   }
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 5d253be..d77d574 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -78,6 +78,13 @@ static cl::opt<bool>
     AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
                                    cl::Hidden, cl::init(true));
 
+/// Should we split vectors of pointers into their individual elements?  This
+/// is known to be buggy, but the alternate implementation isn't yet ready.
+/// This is purely to provide a debugging and dianostic hook until the vector
+/// split is replaced with vector relocations.
+static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden,
+                                    cl::init(true));
+
 namespace {
 struct RewriteStatepointsForGC : public ModulePass {
   static char ID; // Pass identification, replacement for typeid
@@ -357,10 +364,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
 /// particular element in 'I'.  
 static BaseDefiningValueResult
 findBaseDefiningValueOfVector(Value *I) {
-  assert(I->getType()->isVectorTy() &&
-         cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
-         "Illegal to ask for the base pointer of a non-pointer type");
-
   // Each case parallels findBaseDefiningValue below, see that code for
   // detailed motivation.
 
@@ -368,26 +371,10 @@ findBaseDefiningValueOfVector(Value *I) {
     // An incoming argument to the function is a base pointer
     return BaseDefiningValueResult(I, true);
 
-  // We shouldn't see the address of a global as a vector value?
-  assert(!isa<GlobalVariable>(I) &&
-         "unexpected global variable found in base of vector");
-
-  // inlining could possibly introduce phi node that contains
-  // undef if callee has multiple returns
-  if (isa<UndefValue>(I))
-    // utterly meaningless, but useful for dealing with partially optimized
-    // code.
+  if (isa<Constant>(I))
+    // Constant vectors consist only of constant pointers.
     return BaseDefiningValueResult(I, true);
 
-  // Due to inheritance, this must be _after_ the global variable and undef
-  // checks
-  if (Constant *Con = dyn_cast<Constant>(I)) {
-    assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
-           "order of checks wrong!");
-    assert(Con->isNullValue() && "null is the only case which makes sense");
-    return BaseDefiningValueResult(Con, true);
-  }
-  
   if (isa<LoadInst>(I))
     return BaseDefiningValueResult(I, true);
 
@@ -417,11 +404,11 @@ findBaseDefiningValueOfVector(Value *I) {
 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
 /// from pointer to vector type or back.
 static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
+  assert(I->getType()->isPtrOrPtrVectorTy() &&
+         "Illegal to ask for the base pointer of a non-pointer type");
+
   if (I->getType()->isVectorTy())
     return findBaseDefiningValueOfVector(I);
-  
-  assert(I->getType()->isPointerTy() &&
-         "Illegal to ask for the base pointer of a non-pointer type");
 
   if (isa<Argument>(I))
     // An incoming argument to the function is a base pointer
@@ -1310,18 +1297,29 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
     assert(Index < LiveVec.size() && "Bug in std::find?");
     return Index;
   };
-
-  // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
-  // unique declarations for each pointer type, but this proved problematic
-  // because the intrinsic mangling code is incomplete and fragile.  Since
-  // we're moving towards a single unified pointer type anyways, we can just
-  // cast everything to an i8* of the right address space.  A bitcast is added
-  // later to convert gc_relocate to the actual value's type. 
   Module *M = StatepointToken->getModule();
-  auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
-  Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
-  Value *GCRelocateDecl =
-    Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+  
+  // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
+  // element type is i8 addrspace(1)*). We originally generated unique
+  // declarations for each pointer type, but this proved problematic because
+  // the intrinsic mangling code is incomplete and fragile.  Since we're moving
+  // towards a single unified pointer type anyways, we can just cast everything
+  // to an i8* of the right address space.  A bitcast is added later to convert
+  // gc_relocate to the actual value's type.  
+  auto getGCRelocateDecl = [&] (Type *Ty) {
+    assert(isHandledGCPointerType(Ty));
+    auto AS = Ty->getScalarType()->getPointerAddressSpace();
+    Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
+    if (auto *VT = dyn_cast<VectorType>(Ty))
+      NewTy = VectorType::get(NewTy, VT->getNumElements());
+    return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
+                                     {NewTy});
+  };
+
+  // Lazily populated map from input types to the canonicalized form mentioned
+  // in the comment above.  This should probably be cached somewhere more
+  // broadly.
+  DenseMap<Type*, Value*> TypeToDeclMap;
 
   for (unsigned i = 0; i < LiveVariables.size(); i++) {
     // Generate the gc.relocate call and save the result
@@ -1329,6 +1327,11 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
       Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
     Value *LiveIdx = Builder.getInt32(LiveStart + i);
 
+    Type *Ty = LiveVariables[i]->getType();
+    if (!TypeToDeclMap.count(Ty))
+      TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
+    Value *GCRelocateDecl = TypeToDeclMap[Ty];
+
     // only specify a debug name if we can give a useful one
     CallInst *Reloc = Builder.CreateCall(
         GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
@@ -2380,15 +2383,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
 
   // Do a limited scalarization of any live at safepoint vector values which
   // contain pointers.  This enables this pass to run after vectorization at
-  // the cost of some possible performance loss.  TODO: it would be nice to
-  // natively support vectors all the way through the backend so we don't need
-  // to scalarize here.
-  for (size_t i = 0; i < Records.size(); i++) {
-    PartiallyConstructedSafepointRecord &Info = Records[i];
-    Instruction *Statepoint = ToUpdate[i].getInstruction();
-    splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
-                      Info.PointerToBase, DT);
-  }
+  // the cost of some possible performance loss.  Note: This is known to not
+  // handle updating of the side tables correctly which can lead to relocation
+  // bugs when the same vector is live at multiple statepoints.  We're in the
+  // process of implementing the alternate lowering - relocating the
+  // vector-of-pointers as first class item and updating the backend to
+  // understand that - but that's not yet complete.  
+  if (UseVectorSplit)
+    for (size_t i = 0; i < Records.size(); i++) {
+      PartiallyConstructedSafepointRecord &Info = Records[i];
+      Instruction *Statepoint = ToUpdate[i].getInstruction();
+      splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+                        Info.PointerToBase, DT);
+    }
 
   // In order to reduce live set of statepoint we might choose to rematerialize
   // some values instead of relocating them. This is purely an optimization and
@@ -2467,7 +2474,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
 #ifndef NDEBUG
   // sanity check
   for (auto *Ptr : Live)
-    assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
+    assert(isHandledGCPointerType(Ptr->getType()) &&
+           "must be a gc pointer type");
 #endif
 
   relocationViaAlloca(F, DT, Live, Records);
@@ -2547,7 +2555,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
 static bool shouldRewriteStatepointsIn(Function &F) {
   // TODO: This should check the GCStrategy
   if (F.hasGC()) {
-    const char *FunctionGCName = F.getGC();
+    const auto &FunctionGCName = F.getGC();
     const StringRef StatepointExampleName("statepoint-example");
     const StringRef CoreCLRName("coreclr");
     return (StatepointExampleName == FunctionGCName) ||
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 2fca803..8569e08 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -757,9 +757,14 @@ void SCCPSolver::visitCastInst(CastInst &I) {
   LatticeVal OpSt = getValueState(I.getOperand(0));
   if (OpSt.isOverdefined())          // Inherit overdefinedness of operand
     markOverdefined(&I);
-  else if (OpSt.isConstant())        // Propagate constant value
-    markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
-                                           OpSt.getConstant(), I.getType()));
+  else if (OpSt.isConstant()) {
+    Constant *C =
+        ConstantExpr::getCast(I.getOpcode(), OpSt.getConstant(), I.getType());
+    if (isa<UndefValue>(C))
+      return;
+    // Propagate constant value
+    markConstant(&I, C);
+  }
 }
 
 
@@ -859,10 +864,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
   LatticeVal &IV = ValueState[&I];
   if (IV.isOverdefined()) return;
 
-  if (V1State.isConstant() && V2State.isConstant())
-    return markConstant(IV, &I,
-                        ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
-                                          V2State.getConstant()));
+  if (V1State.isConstant() && V2State.isConstant()) {
+    Constant *C = ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+                                    V2State.getConstant());
+    // X op Y -> undef.
+    if (isa<UndefValue>(C))
+      return;
+    return markConstant(IV, &I, C);
+  }
 
   // If something is undef, wait for it to resolve.
   if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -917,10 +926,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
   LatticeVal &IV = ValueState[&I];
   if (IV.isOverdefined()) return;
 
-  if (V1State.isConstant() && V2State.isConstant())
-    return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
-                                                         V1State.getConstant(),
-                                                        V2State.getConstant()));
+  if (V1State.isConstant() && V2State.isConstant()) {
+    Constant *C = ConstantExpr::getCompare(
+        I.getPredicate(), V1State.getConstant(), V2State.getConstant());
+    if (isa<UndefValue>(C))
+      return;
+    return markConstant(IV, &I, C);
+  }
 
   // If operands are still undefined, wait for it to resolve.
   if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -1020,8 +1032,11 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
 
   Constant *Ptr = Operands[0];
   auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
-  markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
-                                                  Indices));
+  Constant *C =
+      ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
+  if (isa<UndefValue>(C))
+      return;
+  markConstant(&I, C);
 }
 
 void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1061,9 +1076,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
 
   Constant *Ptr = PtrVal.getConstant();
 
-  // load null -> null
+  // load null is undefined.
   if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
-    return markConstant(IV, &I, UndefValue::get(I.getType()));
+    return;
 
   // Transform load (constant global) into the value loaded.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
@@ -1079,8 +1094,11 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
   }
 
   // Transform load from a constant into a constant if possible.
-  if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL))
+  if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) {
+    if (isa<UndefValue>(C))
+      return;
     return markConstant(IV, &I, C);
+  }
 
   // Otherwise we cannot say for certain what value this load will produce.
   // Bail out.
@@ -1122,8 +1140,12 @@ CallOverdefined:
 
       // If we can constant fold this, mark the result of the call as a
       // constant.
-      if (Constant *C = ConstantFoldCall(F, Operands, TLI))
+      if (Constant *C = ConstantFoldCall(F, Operands, TLI)) {
+        // call -> undef.
+        if (isa<UndefValue>(C))
+          return;
         return markConstant(I, C);
+      }
     }
 
     // Otherwise, we don't know anything about this call, mark it overdefined.
@@ -1379,6 +1401,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // X % undef -> undef.  No change.
         if (Op1LV.isUndefined()) break;
 
+        // X / 0 -> undef.  No change.
+        // X % 0 -> undef.  No change.
+        if (Op1LV.isConstant() && Op1LV.getConstant()->isZeroValue())
+          break;
+
         // undef / X -> 0.   X could be maxint.
         // undef % X -> 0.   X could be 1.
         markForcedConstant(&I, Constant::getNullValue(ITy));
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 0e0b00d..4e84d72 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -425,9 +425,7 @@ bool TailCallElim::runTRE(Function &F) {
   // with themselves.  Check to see if we did and clean up our mess if so.  This
   // occurs when a function passes an argument straight through to its tail
   // call.
-  for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
-    PHINode *PN = ArgumentPHIs[i];
-
+  for (PHINode *PN : ArgumentPHIs) {
     // If the PHI Node is a dynamic constant, replace it with the value it is.
     if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
       PN->replaceAllUsesWith(PNV);
@@ -468,10 +466,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
   // return value of the call, it must only use things that are defined before
   // the call, or movable instructions between the call and the instruction
   // itself.
-  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-    if (I->getOperand(i) == CI)
-      return false;
-  return true;
+  return std::find(I->op_begin(), I->op_end(), CI) == I->op_end();
 }
 
 /// Return true if the specified value is the same when the return would exit
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index a5137e9..72db980 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -626,11 +626,17 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
     Clone2->setName(Twine("lpad") + Suffix2);
     NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
 
-    // Create a PHI node for the two cloned landingpad instructions.
-    PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
-    PN->addIncoming(Clone1, NewBB1);
-    PN->addIncoming(Clone2, NewBB2);
-    LPad->replaceAllUsesWith(PN);
+    // Create a PHI node for the two cloned landingpad instructions only
+    // if the original landingpad instruction has some uses.
+    if (!LPad->use_empty()) {
+      assert(!LPad->getType()->isTokenTy() &&
+             "Split cannot be applied if LPad is token type. Otherwise an "
+             "invalid PHINode of token type would be created.");
+      PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+      PN->addIncoming(Clone1, NewBB1);
+      PN->addIncoming(Clone2, NewBB2);
+      LPad->replaceAllUsesWith(PN);
+    }
     LPad->eraseFromParent();
   } else {
     // There is no second clone. Just replace the landing pad with the first
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 854a3b8..6454afb 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,27 +266,14 @@ namespace {
     bool ModuleLevelChanges;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
-    CloningDirector *Director;
-    ValueMapTypeRemapper *TypeMapper;
-    ValueMaterializer *Materializer;
 
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
                           ValueToValueMapTy &valueMap, bool moduleLevelChanges,
-                          const char *nameSuffix, ClonedCodeInfo *codeInfo,
-                          CloningDirector *Director)
+                          const char *nameSuffix, ClonedCodeInfo *codeInfo)
         : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
           ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
-          CodeInfo(codeInfo), Director(Director) {
-      // These are optional components.  The Director may return null.
-      if (Director) {
-        TypeMapper = Director->getTypeRemapper();
-        Materializer = Director->getValueMaterializer();
-      } else {
-        TypeMapper = nullptr;
-        Materializer = nullptr;
-      }
-    }
+          CodeInfo(codeInfo) {}
 
     /// The specified block is found to be reachable, clone it and
     /// anything that it can reach.
@@ -332,23 +319,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   // loop doesn't include the terminator.
   for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
        II != IE; ++II) {
-    // If the "Director" remaps the instruction, don't clone it.
-    if (Director) {
-      CloningDirector::CloningAction Action =
-          Director->handleInstruction(VMap, &*II, NewBB);
-      // If the cloning director says stop, we want to stop everything, not
-      // just break out of the loop (which would cause the terminator to be
-      // cloned).  The cloning director is responsible for inserting a proper
-      // terminator into the new basic block in this case.
-      if (Action == CloningDirector::StopCloningBB)
-        return;
-      // If the cloning director says skip, continue to the next instruction.
-      // In this case, the cloning director is responsible for mapping the
-      // skipped instruction to some value that is defined in the new
-      // basic block.
-      if (Action == CloningDirector::SkipInstruction)
-        continue;
-    }
 
     Instruction *NewInst = II->clone();
 
@@ -356,8 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
     // nodes for which we defer processing until we update the CFG.
     if (!isa<PHINode>(NewInst)) {
       RemapInstruction(NewInst, VMap,
-                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
-                       TypeMapper, Materializer);
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
 
       // If we can simplify this instruction to some other value, simply add
       // a mapping to that value rather than inserting a new instruction into
@@ -397,26 +366,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   // Finally, clone over the terminator.
   const TerminatorInst *OldTI = BB->getTerminator();
   bool TerminatorDone = false;
-  if (Director) {
-    CloningDirector::CloningAction Action 
-                           = Director->handleInstruction(VMap, OldTI, NewBB);
-    // If the cloning director says stop, we want to stop everything, not
-    // just break out of the loop (which would cause the terminator to be
-    // cloned).  The cloning director is responsible for inserting a proper
-    // terminator into the new basic block in this case.
-    if (Action == CloningDirector::StopCloningBB)
-      return;
-    if (Action == CloningDirector::CloneSuccessors) {
-      // If the director says to skip with a terminate instruction, we still
-      // need to clone this block's successors.
-      const TerminatorInst *TI = NewBB->getTerminator();
-      for (const BasicBlock *Succ : TI->successors())
-        ToClone.push_back(Succ);
-      return;
-    }
-    assert(Action != CloningDirector::SkipInstruction && 
-           "SkipInstruction is not valid for terminators.");
-  }
   if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
     if (BI->isConditional()) {
       // If the condition was a known constant in the callee...
@@ -485,19 +434,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                      ValueToValueMapTy &VMap,
                                      bool ModuleLevelChanges,
                                      SmallVectorImpl<ReturnInst *> &Returns,
-                                     const char *NameSuffix, 
-                                     ClonedCodeInfo *CodeInfo,
-                                     CloningDirector *Director) {
+                                     const char *NameSuffix,
+                                     ClonedCodeInfo *CodeInfo) {
   assert(NameSuffix && "NameSuffix cannot be null!");
 
   ValueMapTypeRemapper *TypeMapper = nullptr;
   ValueMaterializer *Materializer = nullptr;
 
-  if (Director) {
-    TypeMapper = Director->getTypeRemapper();
-    Materializer = Director->getValueMaterializer();
-  }
-
 #ifndef NDEBUG
   // If the cloning starts at the beginning of the function, verify that
   // the function arguments are mapped.
@@ -507,7 +450,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
 #endif
 
   PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
-                            NameSuffix, CodeInfo, Director);
+                            NameSuffix, CodeInfo);
   const BasicBlock *StartingBB;
   if (StartingInst)
     StartingBB = StartingInst->getParent();
@@ -731,8 +674,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      ClonedCodeInfo *CodeInfo,
                                      Instruction *TheCall) {
   CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
-                            ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
-                            nullptr);
+                            ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
 }
 
 /// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 0e386ac..d2793e5 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
@@ -1051,9 +1052,31 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
     ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
   if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
     ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
-  if (ExtendedArg)
-    Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr,
+  if (ExtendedArg) {
+    // We're now only describing a subset of the variable. The piece we're
+    // describing will always be smaller than the variable size, because
+    // VariableSize == Size of Alloca described by DDI. Since SI stores
+    // to the alloca described by DDI, if it's first operand is an extend,
+    // we're guaranteed that before extension, the value was narrower than
+    // the size of the alloca, hence the size of the described variable.
+    SmallVector<uint64_t, 3> NewDIExpr;
+    unsigned PieceOffset = 0;
+    // If this already is a bit piece, we drop the bit piece from the expression
+    // and record the offset.
+    if (DIExpr->isBitPiece()) {
+      NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3);
+      PieceOffset = DIExpr->getBitPieceOffset();
+    } else {
+      NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
+    }
+    NewDIExpr.push_back(dwarf::DW_OP_bit_piece);
+    NewDIExpr.push_back(PieceOffset); //Offset
+    const DataLayout &DL = DDI->getModule()->getDataLayout();
+    NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size
+    Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar,
+                                    Builder.createExpression(NewDIExpr),
                                     DDI->getDebugLoc(), SI);
+  }
   else
     Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
                                     DDI->getDebugLoc(), SI);
@@ -1407,7 +1430,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {
 /// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
 /// if they are in a dead cycle.  Return true if a change was made, false
 /// otherwise.
-bool llvm::removeUnreachableBlocks(Function &F) {
+bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
   SmallPtrSet<BasicBlock*, 128> Reachable;
   bool Changed = markAliveBlocks(F, Reachable);
 
@@ -1428,6 +1451,8 @@ bool llvm::removeUnreachableBlocks(Function &F) {
          ++SI)
       if (Reachable.count(*SI))
         (*SI)->removePredecessor(&*BB);
+    if (LVI)
+      LVI->eraseBlock(&*BB);
     BB->dropAllReferences();
   }
 
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 2499b88..eea9237 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -528,7 +528,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   Loop *OuterL = L->getParentLoop();
   // Update LoopInfo if the loop is completely removed.
   if (CompletelyUnroll)
-    LI->updateUnloop(L);;
+    LI->markAsRemoved(L);
 
   // If we have a pass and a DominatorTree we should re-simplify impacted loops
   // to ensure subsequent analyses can rely on this form. We want to simplify
@@ -542,7 +542,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
 
       // LCSSA must be performed on the outermost affected loop. The unrolled
       // loop's last loop latch is guaranteed to be in the outermost loop after
-      // LoopInfo's been updated by updateUnloop.
+      // LoopInfo's been updated by markAsRemoved.
       Loop *LatchLoop = LI->getLoopFor(Latches.back());
       if (!OuterL->contains(LatchLoop))
         while (OuterL->getParentLoop() != LatchLoop)
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index e038805..fa958e9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -599,7 +599,7 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
   FastMathFlags FMF;
   FMF.setUnsafeAlgebra();
-  Builder.SetFastMathFlags(FMF);
+  Builder.setFastMathFlags(FMF);
 
   Value *Cmp;
   if (RK == MRK_FloatMin || RK == MRK_FloatMax)
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3bb3fa5..3125a2c 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -141,6 +141,8 @@ class SimplifyCFGOpt {
 
   bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
   bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+  bool SimplifySingleResume(ResumeInst *RI);
+  bool SimplifyCommonResume(ResumeInst *RI);
   bool SimplifyCleanupReturn(CleanupReturnInst *RI);
   bool SimplifyUnreachable(UnreachableInst *UI);
   bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
@@ -3239,14 +3241,101 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
 }
 
 bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
-  // If this is a trivial landing pad that just continues unwinding the caught
-  // exception then zap the landing pad, turning its invokes into calls.
+  if (isa<PHINode>(RI->getValue()))
+    return SimplifyCommonResume(RI);
+  else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
+           RI->getValue() == RI->getParent()->getFirstNonPHI())
+    // The resume must unwind the exception that caused control to branch here.
+    return SimplifySingleResume(RI);
+
+  return false;
+}
+
+// Simplify resume that is shared by several landing pads (phi of landing pad).
+bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
+  BasicBlock *BB = RI->getParent();
+
+  // Check that there are no other instructions except for debug intrinsics
+  // between the phi of landing pads (RI->getValue()) and resume instruction.
+  BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
+		  	  	  	   E = RI->getIterator();
+  while (++I != E)
+    if (!isa<DbgInfoIntrinsic>(I))
+      return false;
+
+  SmallSet<BasicBlock *, 4> TrivialUnwindBlocks;
+  auto *PhiLPInst = cast<PHINode>(RI->getValue());
+
+  // Check incoming blocks to see if any of them are trivial.
+  for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues();
+       Idx != End; Idx++) {
+    auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
+    auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
+
+    // If the block has other successors, we can not delete it because
+    // it has other dependents.
+    if (IncomingBB->getUniqueSuccessor() != BB)
+      continue;
+
+    auto *LandingPad =
+        dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
+    // Not the landing pad that caused the control to branch here.
+    if (IncomingValue != LandingPad)
+      continue;
+
+    bool isTrivial = true;
+
+    I = IncomingBB->getFirstNonPHI()->getIterator();
+    E = IncomingBB->getTerminator()->getIterator();
+    while (++I != E)
+      if (!isa<DbgInfoIntrinsic>(I)) {
+        isTrivial = false;
+        break;
+      }
+
+    if (isTrivial)
+      TrivialUnwindBlocks.insert(IncomingBB);
+  }
+
+  // If no trivial unwind blocks, don't do any simplifications.
+  if (TrivialUnwindBlocks.empty()) return false;
+
+  // Turn all invokes that unwind here into calls.
+  for (auto *TrivialBB : TrivialUnwindBlocks) {
+    // Blocks that will be simplified should be removed from the phi node.
+    // Note there could be multiple edges to the resume block, and we need
+    // to remove them all.
+    while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
+      BB->removePredecessor(TrivialBB, true);
+
+    for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
+         PI != PE;) {
+      BasicBlock *Pred = *PI++;
+      removeUnwindEdge(Pred);
+    }
+
+    // In each SimplifyCFG run, only the current processed block can be erased.
+    // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
+    // of erasing TrivialBB, we only remove the branch to the common resume
+    // block so that we can later erase the resume block since it has no
+    // predecessors.
+    TrivialBB->getTerminator()->eraseFromParent();
+    new UnreachableInst(RI->getContext(), TrivialBB);
+  }
+
+  // Delete the resume block if all its predecessors have been removed.
+  if (pred_empty(BB))
+    BB->eraseFromParent();
+
+  return !TrivialUnwindBlocks.empty();
+}
+
+// Simplify resume that is only used by a single (non-phi) landing pad.
+bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
   BasicBlock *BB = RI->getParent();
   LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
-  if (RI->getValue() != LPInst)
-    // Not a landing pad, or the resume is not unwinding the exception that
-    // caused control to branch here.
-    return false;
+  assert (RI->getValue() == LPInst &&
+          "Resume must unwind the exception that caused control to here");
 
   // Check that there are no other instructions except for debug intrinsics.
   BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index dc5fee5..dc07440 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -997,7 +997,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
   
   // Propagate fast-math flags from the existing call to the new call.
   IRBuilder<>::FastMathFlagGuard Guard(B);
-  B.SetFastMathFlags(CI->getFastMathFlags());
+  B.setFastMathFlags(CI->getFastMathFlags());
 
   // floor((double)floatval) -> (double)floorf(floatval)
   if (Callee->isIntrinsic()) {
@@ -1035,7 +1035,7 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
 
   // Propagate fast-math flags from the existing call to the new call.
   IRBuilder<>::FastMathFlagGuard Guard(B);
-  B.SetFastMathFlags(CI->getFastMathFlags());
+  B.setFastMathFlags(CI->getFastMathFlags());
 
   // fmin((double)floatval1, (double)floatval2)
   //                      -> (double)fminf(floatval1, floatval2)
@@ -1127,29 +1127,26 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
                                   Callee->getAttributes());
   }
 
+  // FIXME: Use instruction-level FMF.
   bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
 
-  // pow(exp(x), y) -> exp(x*y)
+  // pow(exp(x), y) -> exp(x * y)
   // pow(exp2(x), y) -> exp2(x * y)
-  // We enable these only under fast-math. Besides rounding
-  // differences the transformation changes overflow and
-  // underflow behavior quite dramatically.
+  // We enable these only with fast-math. Besides rounding differences, the
+  // transformation changes overflow and underflow behavior quite dramatically.
   // Example: x = 1000, y = 0.001.
   // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
-  if (UnsafeFPMath) {
-    if (auto *OpC = dyn_cast<CallInst>(Op1)) {
+  auto *OpC = dyn_cast<CallInst>(Op1);
+  if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
+    LibFunc::Func Func;
+    Function *OpCCallee = OpC->getCalledFunction();
+    if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
+        TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) {
       IRBuilder<>::FastMathFlagGuard Guard(B);
-      FastMathFlags FMF;
-      FMF.setUnsafeAlgebra();
-      B.SetFastMathFlags(FMF);
-
-      LibFunc::Func Func;
-      Function *OpCCallee = OpC->getCalledFunction();
-      if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
-          TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2))
-        return EmitUnaryFloatFnCall(
-            B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"),
-            OpCCallee->getName(), B, OpCCallee->getAttributes());
+      B.setFastMathFlags(CI->getFastMathFlags());
+      Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
+      return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
+                                  OpCCallee->getAttributes());
     }
   }
 
@@ -1167,9 +1164,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
                       LibFunc::fabsl)) {
 
     // In -ffast-math, pow(x, 0.5) -> sqrt(x).
-    if (UnsafeFPMath)
+    if (CI->hasUnsafeAlgebra()) {
+      IRBuilder<>::FastMathFlagGuard Guard(B);
+      B.setFastMathFlags(CI->getFastMathFlags());
       return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
                                   Callee->getAttributes());
+    }
 
     // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
     // This is faster than calling pow, and still handles negative zero
@@ -1328,7 +1328,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
     FMF.setNoSignedZeros();
     FMF.setNoNaNs();
   }
-  B.SetFastMathFlags(FMF);
+  B.setFastMathFlags(FMF);
 
   // We have a relaxed floating-point environment. We can ignore NaN-handling
   // and transform to a compare and select. We do not have to consider errno or
@@ -1354,11 +1354,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
       !FT->getParamType(0)->isFloatingPointTy())
     return Ret;
 
-  if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+  if (!CI->hasUnsafeAlgebra())
     return Ret;
   Value *Op1 = CI->getArgOperand(0);
   auto *OpC = dyn_cast<CallInst>(Op1);
-  if (!OpC)
+
+  // The earlier call must also be unsafe in order to do these transforms.
+  if (!OpC || !OpC->hasUnsafeAlgebra())
     return Ret;
 
   // log(pow(x,y)) -> y*log(x)
@@ -1369,7 +1371,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
   IRBuilder<>::FastMathFlagGuard Guard(B);
   FastMathFlags FMF;
   FMF.setUnsafeAlgebra();
-  B.SetFastMathFlags(FMF);
+  B.setFastMathFlags(FMF);
 
   LibFunc::Func Func;
   Function *F = OpC->getCalledFunction();
@@ -1397,66 +1399,67 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
   if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
                                    Callee->getIntrinsicID() == Intrinsic::sqrt))
     Ret = optimizeUnaryDoubleFP(CI, B, true);
-  if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+
+  if (!CI->hasUnsafeAlgebra())
     return Ret;
 
-  Value *Op = CI->getArgOperand(0);
-  if (Instruction *I = dyn_cast<Instruction>(Op)) {
-    if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) {
-      // We're looking for a repeated factor in a multiplication tree,
-      // so we can do this fold: sqrt(x * x) -> fabs(x);
-      // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y).
-      Value *Op0 = I->getOperand(0);
-      Value *Op1 = I->getOperand(1);
-      Value *RepeatOp = nullptr;
-      Value *OtherOp = nullptr;
-      if (Op0 == Op1) {
-        // Simple match: the operands of the multiply are identical.
-        RepeatOp = Op0;
-      } else {
-        // Look for a more complicated pattern: one of the operands is itself
-        // a multiply, so search for a common factor in that multiply.
-        // Note: We don't bother looking any deeper than this first level or for
-        // variations of this pattern because instcombine's visitFMUL and/or the
-        // reassociation pass should give us this form.
-        Value *OtherMul0, *OtherMul1;
-        if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
-          // Pattern: sqrt((x * y) * z)
-          if (OtherMul0 == OtherMul1) {
-            // Matched: sqrt((x * x) * z)
-            RepeatOp = OtherMul0;
-            OtherOp = Op1;
-          }
-        }
-      }
-      if (RepeatOp) {
-        // Fast math flags for any created instructions should match the sqrt
-        // and multiply.
-        // FIXME: We're not checking the sqrt because it doesn't have
-        // fast-math-flags (see earlier comment).
-        IRBuilder<>::FastMathFlagGuard Guard(B);
-        B.SetFastMathFlags(I->getFastMathFlags());
-        // If we found a repeated factor, hoist it out of the square root and
-        // replace it with the fabs of that factor.
-        Module *M = Callee->getParent();
-        Type *ArgType = Op->getType();
-        Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
-        Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
-        if (OtherOp) {
-          // If we found a non-repeated factor, we still need to get its square
-          // root. We then multiply that by the value that was simplified out
-          // of the square root calculation.
-          Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
-          Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
-          return B.CreateFMul(FabsCall, SqrtCall);
-        }
-        return FabsCall;
+  Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
+  if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+    return Ret;
+
+  // We're looking for a repeated factor in a multiplication tree,
+  // so we can do this fold: sqrt(x * x) -> fabs(x);
+  // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
+  Value *Op0 = I->getOperand(0);
+  Value *Op1 = I->getOperand(1);
+  Value *RepeatOp = nullptr;
+  Value *OtherOp = nullptr;
+  if (Op0 == Op1) {
+    // Simple match: the operands of the multiply are identical.
+    RepeatOp = Op0;
+  } else {
+    // Look for a more complicated pattern: one of the operands is itself
+    // a multiply, so search for a common factor in that multiply.
+    // Note: We don't bother looking any deeper than this first level or for
+    // variations of this pattern because instcombine's visitFMUL and/or the
+    // reassociation pass should give us this form.
+    Value *OtherMul0, *OtherMul1;
+    if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
+      // Pattern: sqrt((x * y) * z)
+      if (OtherMul0 == OtherMul1 &&
+          cast<Instruction>(Op0)->hasUnsafeAlgebra()) {
+        // Matched: sqrt((x * x) * z)
+        RepeatOp = OtherMul0;
+        OtherOp = Op1;
       }
     }
   }
-  return Ret;
-}
+  if (!RepeatOp)
+    return Ret;
 
+  // Fast math flags for any created instructions should match the sqrt
+  // and multiply.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(I->getFastMathFlags());
+
+  // If we found a repeated factor, hoist it out of the square root and
+  // replace it with the fabs of that factor.
+  Module *M = Callee->getParent();
+  Type *ArgType = I->getType();
+  Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+  Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
+  if (OtherOp) {
+    // If we found a non-repeated factor, we still need to get its square
+    // root. We then multiply that by the value that was simplified out
+    // of the square root calculation.
+    Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+    Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
+    return B.CreateFMul(FabsCall, SqrtCall);
+  }
+  return FabsCall;
+}
+
+// TODO: Generalize to handle any trig function and its inverse.
 Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
   Value *Ret = nullptr;
@@ -1471,13 +1474,15 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
       !FT->getParamType(0)->isFloatingPointTy())
     return Ret;
 
-  if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
-    return Ret;
   Value *Op1 = CI->getArgOperand(0);
   auto *OpC = dyn_cast<CallInst>(Op1);
   if (!OpC)
     return Ret;
 
+  // Both calls must allow unsafe optimizations in order to remove them.
+  if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra())
+    return Ret;
+
   // tan(atan(x)) -> x
   // tanf(atanf(x)) -> x
   // tanl(atanl(x)) -> x
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 2e361d3..f47ddb9 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -222,8 +222,17 @@ static void resolveCycles(Metadata *MD, bool AllowTemps) {
   if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
     if (AllowTemps && N->isTemporary())
       return;
-    if (!N->isResolved())
-      N->resolveCycles(AllowTemps);
+    if (!N->isResolved()) {
+      if (AllowTemps)
+        // Note that this will drop RAUW support on any temporaries, which
+        // blocks uniquing. If this ends up being an issue, in the future
+        // we can experiment with delaying resolving these nodes until
+        // after metadata is fully materialized (i.e. when linking metadata
+        // as a postpass after function importing).
+        N->resolveNonTemporaries();
+      else
+        N->resolveCycles();
+    }
   }
 }
 
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9ed44d1..27d3337 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -222,7 +222,7 @@ static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
     }
   }
 }
-  
+
 /// \returns \p I after propagating metadata from \p VL.
 static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
   Instruction *I0 = cast<Instruction>(VL[0]);
@@ -506,7 +506,7 @@ private:
     }
     return Last;
   }
-  
+
   /// -- Vectorization State --
   /// Holds all of the tree entries.
   std::vector<TreeEntry> VectorizableTree;
@@ -884,7 +884,7 @@ private:
 
     /// The current size of the scheduling region.
     int ScheduleRegionSize;
-    
+
     /// The maximum size allowed for the scheduling region.
     int ScheduleRegionSizeLimit;
 
@@ -1089,7 +1089,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
     newTreeEntry(VL, false);
     return;
   }
-  
+
   // Check that every instructions appears once in this bundle.
   for (unsigned i = 0, e = VL.size(); i < e; ++i)
     for (unsigned j = i+1; j < e; ++j)
@@ -1711,7 +1711,7 @@ int BoUpSLP::getSpillCost() {
   int Cost = 0;
 
   SmallPtrSet<Instruction*, 4> LiveValues;
-  Instruction *PrevInst = nullptr; 
+  Instruction *PrevInst = nullptr;
 
   for (unsigned N = 0; N < VectorizableTree.size(); ++N) {
     Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]);
@@ -1736,7 +1736,7 @@ int BoUpSLP::getSpillCost() {
     for (auto &J : PrevInst->operands()) {
       if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
         LiveValues.insert(cast<Instruction>(&*J));
-    }    
+    }
 
     // Now find the sequence of instructions between PrevInst and Inst.
     BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
@@ -1780,30 +1780,29 @@ int BoUpSLP::getTreeCost() {
 
   unsigned BundleWidth = VectorizableTree[0].Scalars.size();
 
-  for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) {
-    int C = getEntryCost(&VectorizableTree[i]);
+  for (TreeEntry &TE : VectorizableTree) {
+    int C = getEntryCost(&TE);
     DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with "
-          << *VectorizableTree[i].Scalars[0] << " .\n");
+          << TE.Scalars[0] << " .\n");
     Cost += C;
   }
 
   SmallSet<Value *, 16> ExtractCostCalculated;
   int ExtractCost = 0;
-  for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
-       I != E; ++I) {
+  for (ExternalUser &EU : ExternalUses) {
     // We only add extract cost once for the same scalar.
-    if (!ExtractCostCalculated.insert(I->Scalar).second)
+    if (!ExtractCostCalculated.insert(EU.Scalar).second)
       continue;
 
     // Uses by ephemeral values are free (because the ephemeral value will be
     // removed prior to code generation, and so the extraction will be
     // removed as well).
-    if (EphValues.count(I->User))
+    if (EphValues.count(EU.User))
       continue;
 
-    VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
+    VectorType *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
     ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
-                                           I->Lane);
+                                           EU.Lane);
   }
 
   Cost += getSpillCost();
@@ -2551,7 +2550,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
 }
 
 Value *BoUpSLP::vectorizeTree() {
-  
+
   // All blocks must be scheduled before any instructions are inserted.
   for (auto &BSIter : BlocksSchedules) {
     scheduleBlock(BSIter.second.get());
@@ -3072,10 +3071,10 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
 }
 
 void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
-  
+
   if (!BS->ScheduleStart)
     return;
-  
+
   DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
 
   BS->resetSchedule();
@@ -3590,7 +3589,7 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
 /// \param NumEltsToRdx The number of elements that should be reduced in the
 ///        vector.
 /// \param IsPairwise Whether the reduction is a pairwise or splitting
-///        reduction. A pairwise reduction will generate a mask of 
+///        reduction. A pairwise reduction will generate a mask of
 ///        <0,2,...> or <1,3,..> while a splitting reduction will generate
 ///        <2,3, undef,undef> for a vector of 4 and NumElts = 2.
 /// \param IsLeft True will generate a mask of even elements, odd otherwise.
@@ -3773,7 +3772,7 @@ public:
     IRBuilder<> Builder(ReductionRoot);
     FastMathFlags Unsafe;
     Unsafe.setUnsafeAlgebra();
-    Builder.SetFastMathFlags(Unsafe);
+    Builder.setFastMathFlags(Unsafe);
     unsigned i = 0;
 
     for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
@@ -4018,9 +4017,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
 
     // Collect the incoming values from the PHIs.
     Incoming.clear();
-    for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
-         ++instr) {
-      PHINode *P = dyn_cast<PHINode>(instr);
+    for (Instruction &I : *BB) {
+      PHINode *P = dyn_cast<PHINode>(&I);
       if (!P)
         break;
 
diff --git a/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h b/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h
index b66009e..abf9294 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h
@@ -131,6 +131,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable llvm::FoldingSet<AutoType> AutoTypes;
   mutable llvm::FoldingSet<AtomicType> AtomicTypes;
   llvm::FoldingSet<AttributedType> AttributedTypes;
+  mutable llvm::FoldingSet<PipeType> PipeTypes;
 
   mutable llvm::FoldingSet<QualifiedTemplateName> QualifiedTemplateNames;
   mutable llvm::FoldingSet<DependentTemplateName> DependentTemplateNames;
@@ -1079,6 +1080,9 @@ public:
   /// blocks.
   QualType getBlockDescriptorType() const;
 
+  /// \brief Return pipe type for the specified type.
+  QualType getPipeType(QualType T) const;
+
   /// Gets the struct used to keep track of the extended descriptor for
   /// pointer to blocks.
   QualType getBlockDescriptorExtendedType() const;
@@ -2279,9 +2283,13 @@ public:
   /// \brief Make an APSInt of the appropriate width and signedness for the
   /// given \p Value and integer \p Type.
   llvm::APSInt MakeIntValue(uint64_t Value, QualType Type) const {
-    llvm::APSInt Res(getIntWidth(Type), 
-                     !Type->isSignedIntegerOrEnumerationType());
+    // If Type is a signed integer type larger than 64 bits, we need to be sure
+    // to sign extend Res appropriately.
+    llvm::APSInt Res(64, !Type->isSignedIntegerOrEnumerationType());
     Res = Value;
+    unsigned Width = getIntWidth(Type);
+    if (Width != Res.getBitWidth())
+      return Res.extOrTrunc(Width);
     return Res;
   }
 
diff --git a/contrib/llvm/tools/clang/include/clang/AST/ASTMutationListener.h b/contrib/llvm/tools/clang/include/clang/AST/ASTMutationListener.h
index 3ff392d..cf3b55d 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/ASTMutationListener.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/ASTMutationListener.h
@@ -29,6 +29,7 @@ namespace clang {
   class ObjCContainerDecl;
   class ObjCInterfaceDecl;
   class ObjCPropertyDecl;
+  class ParmVarDecl;
   class QualType;
   class RecordDecl;
   class TagDecl;
@@ -88,6 +89,9 @@ public:
   /// \brief A function template's definition was instantiated.
   virtual void FunctionDefinitionInstantiated(const FunctionDecl *D) {}
 
+  /// \brief A default argument was instantiated.
+  virtual void DefaultArgumentInstantiated(const ParmVarDecl *D) {}
+
   /// \brief A new objc category class was added for an interface.
   virtual void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                             const ObjCInterfaceDecl *IFD) {}
diff --git a/contrib/llvm/tools/clang/include/clang/AST/BuiltinTypes.def b/contrib/llvm/tools/clang/include/clang/AST/BuiltinTypes.def
index 85e237a..a08a683 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/BuiltinTypes.def
+++ b/contrib/llvm/tools/clang/include/clang/AST/BuiltinTypes.def
@@ -1,4 +1,4 @@
-//===-- BuiltinTypeNodes.def - Metadata about BuiltinTypes ------*- C++ -*-===//
+//===-- BuiltinTypes.def - Metadata about BuiltinTypes ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/contrib/llvm/tools/clang/include/clang/AST/Decl.h b/contrib/llvm/tools/clang/include/clang/AST/Decl.h
index 046ce70..029c118 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/Decl.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/Decl.h
@@ -2440,10 +2440,9 @@ class IndirectFieldDecl : public ValueDecl,
   NamedDecl **Chaining;
   unsigned ChainingSize;
 
-  IndirectFieldDecl(DeclContext *DC, SourceLocation L,
+  IndirectFieldDecl(ASTContext &C, DeclContext *DC, SourceLocation L,
                     DeclarationName N, QualType T,
-                    NamedDecl **CH, unsigned CHS)
-    : ValueDecl(IndirectField, DC, L, N, T), Chaining(CH), ChainingSize(CHS) {}
+                    NamedDecl **CH, unsigned CHS);
 
 public:
   static IndirectFieldDecl *Create(ASTContext &C, DeclContext *DC,
diff --git a/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h b/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h
index 05b2a12..2d6e84a 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h
@@ -113,6 +113,9 @@ public:
     /// Tags, declared with 'struct foo;' and referenced with
     /// 'struct foo'.  All tags are also types.  This is what
     /// elaborated-type-specifiers look for in C.
+    /// This also contains names that conflict with tags in the
+    /// same scope but that are otherwise ordinary names (non-type
+    /// template parameters and indirect field declarations).
     IDNS_Tag                 = 0x0002,
 
     /// Types, declared with 'struct foo', typedefs, etc.
@@ -131,7 +134,7 @@ public:
     IDNS_Namespace           = 0x0010,
 
     /// Ordinary names.  In C, everything that's not a label, tag,
-    /// or member ends up here.
+    /// member, or function-local extern ends up here.
     IDNS_Ordinary            = 0x0020,
 
     /// Objective C \@protocol.
@@ -160,7 +163,9 @@ public:
 
     /// This declaration is a function-local extern declaration of a
     /// variable or function. This may also be IDNS_Ordinary if it
-    /// has been declared outside any function.
+    /// has been declared outside any function. These act mostly like
+    /// invisible friend declarations, but are also visible to unqualified
+    /// lookup within the scope of the declaring function.
     IDNS_LocalExtern         = 0x0800
   };
 
diff --git a/contrib/llvm/tools/clang/include/clang/AST/Expr.h b/contrib/llvm/tools/clang/include/clang/AST/Expr.h
index 095dd6a..38733ee 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/Expr.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/Expr.h
@@ -1292,6 +1292,7 @@ public:
   enum CharacterKind {
     Ascii,
     Wide,
+    UTF8,
     UTF16,
     UTF32
   };
diff --git a/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h b/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h
index 0608aba..6821274 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h
@@ -951,15 +951,9 @@ public:
 /// This wraps up a function call argument that was created from the
 /// corresponding parameter's default argument, when the call did not
 /// explicitly supply arguments for all of the parameters.
-class CXXDefaultArgExpr final
-    : public Expr,
-      private llvm::TrailingObjects<CXXDefaultArgExpr, Expr *> {
+class CXXDefaultArgExpr final : public Expr {
   /// \brief The parameter whose default is being used.
-  ///
-  /// When the bit is set, the subexpression is stored after the
-  /// CXXDefaultArgExpr itself. When the bit is clear, the parameter's
-  /// actual default expression is the subexpression.
-  llvm::PointerIntPair<ParmVarDecl *, 1, bool> Param;
+  ParmVarDecl *Param;
 
   /// \brief The location where the default argument expression was used.
   SourceLocation Loc;
@@ -971,16 +965,7 @@ class CXXDefaultArgExpr final
              : param->getDefaultArg()->getType(),
            param->getDefaultArg()->getValueKind(),
            param->getDefaultArg()->getObjectKind(), false, false, false, false),
-      Param(param, false), Loc(Loc) { }
-
-  CXXDefaultArgExpr(StmtClass SC, SourceLocation Loc, ParmVarDecl *param,
-                    Expr *SubExpr)
-    : Expr(SC, SubExpr->getType(),
-           SubExpr->getValueKind(), SubExpr->getObjectKind(),
-           false, false, false, false),
-      Param(param, true), Loc(Loc) {
-    *getTrailingObjects<Expr *>() = SubExpr;
-  }
+      Param(param), Loc(Loc) { }
 
 public:
   CXXDefaultArgExpr(EmptyShell Empty) : Expr(CXXDefaultArgExprClass, Empty) {}
@@ -992,24 +977,15 @@ public:
     return new (C) CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param);
   }
 
-  // \p Param is the parameter whose default argument is used by this
-  // expression, and \p SubExpr is the expression that will actually be used.
-  static CXXDefaultArgExpr *Create(const ASTContext &C, SourceLocation Loc,
-                                   ParmVarDecl *Param, Expr *SubExpr);
-
   // Retrieve the parameter that the argument was created from.
-  const ParmVarDecl *getParam() const { return Param.getPointer(); }
-  ParmVarDecl *getParam() { return Param.getPointer(); }
+  const ParmVarDecl *getParam() const { return Param; }
+  ParmVarDecl *getParam() { return Param; }
 
   // Retrieve the actual argument to the function call.
   const Expr *getExpr() const {
-    if (Param.getInt())
-      return *getTrailingObjects<Expr *>();
     return getParam()->getDefaultArg();
   }
   Expr *getExpr() {
-    if (Param.getInt())
-      return *getTrailingObjects<Expr *>();
     return getParam()->getDefaultArg();
   }
 
@@ -1033,7 +1009,6 @@ public:
     return child_range(child_iterator(), child_iterator());
   }
 
-  friend TrailingObjects;
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
 };
diff --git a/contrib/llvm/tools/clang/include/clang/AST/OperationKinds.h b/contrib/llvm/tools/clang/include/clang/AST/OperationKinds.h
index 2235c10..102bbc2 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/OperationKinds.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/OperationKinds.h
@@ -185,7 +185,11 @@ enum CastKind {
   /// CK_FloatingToBoolean - Floating point to boolean.
   ///    (bool) f
   CK_FloatingToBoolean,
-    
+
+  // CK_BooleanToSignedIntegral - Convert a boolean to -1 or 0 for true and
+  // false, respectively.
+  CK_BooleanToSignedIntegral,
+
   /// CK_FloatingCast - Casting between floating types of different size.
   ///    (double) f
   ///    (float) ld
diff --git a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
index e6f7583..0c25a45 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -978,6 +978,8 @@ DEF_TRAVERSE_TYPE(ObjCObjectPointerType,
 
 DEF_TRAVERSE_TYPE(AtomicType, { TRY_TO(TraverseType(T->getValueType())); })
 
+DEF_TRAVERSE_TYPE(PipeType, { TRY_TO(TraverseType(T->getElementType())); })
+
 #undef DEF_TRAVERSE_TYPE
 
 // ----------------- TypeLoc traversal -----------------
@@ -1206,6 +1208,8 @@ DEF_TRAVERSE_TYPELOC(ObjCObjectPointerType,
 
 DEF_TRAVERSE_TYPELOC(AtomicType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
 
+DEF_TRAVERSE_TYPELOC(PipeType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
+
 #undef DEF_TRAVERSE_TYPELOC
 
 // ----------------- Decl traversal -----------------
diff --git a/contrib/llvm/tools/clang/include/clang/AST/Stmt.h b/contrib/llvm/tools/clang/include/clang/AST/Stmt.h
index e48b7dc..d3950e9 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/Stmt.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/Stmt.h
@@ -130,7 +130,7 @@ protected:
     friend class CharacterLiteral;
     unsigned : NumExprBits;
 
-    unsigned Kind : 2;
+    unsigned Kind : 3;
   };
 
   enum APFloatSemantics {
diff --git a/contrib/llvm/tools/clang/include/clang/AST/Type.h b/contrib/llvm/tools/clang/include/clang/AST/Type.h
index 0c08130..d63b2c4 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/Type.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/Type.h
@@ -1721,6 +1721,7 @@ public:
   bool isNDRangeT() const;                      // OpenCL ndrange_t
   bool isReserveIDT() const;                    // OpenCL reserve_id_t
 
+  bool isPipeType() const;                      // OpenCL pipe type
   bool isOpenCLSpecificType() const;            // Any OpenCL specific type
 
   /// Determines if this type, which must satisfy
@@ -5015,6 +5016,41 @@ class AtomicType : public Type, public llvm::FoldingSetNode {
   }
 };
 
+/// PipeType - OpenCL20.
+class PipeType : public Type, public llvm::FoldingSetNode {
+  QualType ElementType;
+
+  PipeType(QualType elemType, QualType CanonicalPtr) :
+    Type(Pipe, CanonicalPtr, elemType->isDependentType(),
+         elemType->isInstantiationDependentType(),
+         elemType->isVariablyModifiedType(),
+         elemType->containsUnexpandedParameterPack()),
+    ElementType(elemType) {}
+  friend class ASTContext;  // ASTContext creates these.
+
+public:
+
+  QualType getElementType() const { return ElementType; }
+
+  bool isSugared() const { return false; }
+
+  QualType desugar() const { return QualType(this, 0); }
+
+  void Profile(llvm::FoldingSetNodeID &ID) {
+    Profile(ID, getElementType());
+  }
+
+  static void Profile(llvm::FoldingSetNodeID &ID, QualType T) {
+    ID.AddPointer(T.getAsOpaquePtr());
+  }
+
+
+  static bool classof(const Type *T) {
+    return T->getTypeClass() == Pipe;
+  }
+
+};
+
 /// A qualifier set is used to build a set of qualifiers.
 class QualifierCollector : public Qualifiers {
 public:
@@ -5461,9 +5497,13 @@ inline bool Type::isImageType() const {
          isImage1dBufferT();
 }
 
+inline bool Type::isPipeType() const {
+  return isa<PipeType>(CanonicalType);
+}
+
 inline bool Type::isOpenCLSpecificType() const {
   return isSamplerT() || isEventT() || isImageType() || isClkEventT() ||
-         isQueueT() || isNDRangeT() || isReserveIDT();
+         isQueueT() || isNDRangeT() || isReserveIDT() || isPipeType();
 }
 
 inline bool Type::isTemplateTypeParmType() const {
diff --git a/contrib/llvm/tools/clang/include/clang/AST/TypeLoc.h b/contrib/llvm/tools/clang/include/clang/AST/TypeLoc.h
index 26feda5..29035a4 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/TypeLoc.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/TypeLoc.h
@@ -2033,7 +2033,26 @@ public:
   }
 };
 
+struct PipeTypeLocInfo {
+  SourceLocation KWLoc;
+};
+
+class PipeTypeLoc : public ConcreteTypeLoc<UnqualTypeLoc, PipeTypeLoc, PipeType,
+                                           PipeTypeLocInfo> {
+public:
+  TypeLoc getValueLoc() const { return this->getInnerTypeLoc(); }
+
+  SourceRange getLocalSourceRange() const { return SourceRange(getKWLoc()); }
+
+  SourceLocation getKWLoc() const { return this->getLocalData()->KWLoc; }
+  void setKWLoc(SourceLocation Loc) { this->getLocalData()->KWLoc = Loc; }
 
+  void initializeLocal(ASTContext &Context, SourceLocation Loc) {
+    setKWLoc(Loc);
+  }
+
+  QualType getInnerType() const { return this->getTypePtr()->getElementType(); }
+};
 }
 
 #endif
diff --git a/contrib/llvm/tools/clang/include/clang/AST/TypeNodes.def b/contrib/llvm/tools/clang/include/clang/AST/TypeNodes.def
index 2549f0b..8caf102 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/TypeNodes.def
+++ b/contrib/llvm/tools/clang/include/clang/AST/TypeNodes.def
@@ -104,6 +104,7 @@ NON_CANONICAL_UNLESS_DEPENDENT_TYPE(PackExpansion, Type)
 TYPE(ObjCObject, Type)
 TYPE(ObjCInterface, ObjCObjectType)
 TYPE(ObjCObjectPointer, Type)
+TYPE(Pipe, Type)
 TYPE(Atomic, Type)
 
 #ifdef LAST_TYPE
diff --git a/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
index d499091..1d1d795 100644
--- a/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
+++ b/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
@@ -560,10 +560,10 @@ bool matchesFirstInPointerRange(const MatcherT &Matcher, IteratorT Start,
 
 // Metafunction to determine if type T has a member called
 // getDecl.
-#if defined(_MSC_VER) && (_MSC_VER < 1900) && !defined(__clang__)
-// For old versions of MSVC, we use a weird nonstandard __if_exists
-// statement, since before MSVC2015, it was not standards-conformant
-// enough to compile the usual code below.
+#if defined(_MSC_VER) && !defined(__clang__)
+// For MSVC, we use a weird nonstandard __if_exists statement, as it
+// is not standards-conformant enough to properly compile the standard
+// code below. (At least up through MSVC 2015 require this workaround)
 template <typename T> struct has_getDecl {
   __if_exists(T::getDecl) {
     enum { value = 1 };
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td
index ce270bf..b04498f 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -22,6 +22,7 @@ def err_drv_unknown_stdin_type_clang_cl : Error<
 def err_drv_unknown_language : Error<"language not recognized: '%0'">;
 def err_drv_invalid_arch_name : Error<
   "invalid arch name '%0'">;
+def err_drv_cuda_bad_gpu_arch : Error<"Unsupported CUDA gpu architecture: %0">;
 def err_drv_invalid_thread_model_for_target : Error<
   "invalid thread model '%0' in '%1' for this target">;
 def err_drv_invalid_linker_name : Error<
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
index 8e5f57d..2e4e57b 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
@@ -616,6 +616,7 @@ def Most : DiagGroup<"most", [
     CharSubscript,
     Comment,
     DeleteNonVirtualDtor,
+    ForLoopAnalysis,
     Format,
     Implicit,
     InfiniteRecursion,
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h
index a675dfa..312b71f 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h
@@ -29,7 +29,7 @@ namespace clang {
     enum {
       DIAG_START_COMMON        =                                 0,
       DIAG_START_DRIVER        = DIAG_START_COMMON          +  300,
-      DIAG_START_FRONTEND      = DIAG_START_DRIVER          +  100,
+      DIAG_START_FRONTEND      = DIAG_START_DRIVER          +  200,
       DIAG_START_SERIALIZATION = DIAG_START_FRONTEND        +  100,
       DIAG_START_LEX           = DIAG_START_SERIALIZATION   +  120,
       DIAG_START_PARSE         = DIAG_START_LEX             +  300,
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
index ed6ff20..2fc9664 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -490,6 +490,8 @@ def warn_pragma_diagnostic_unknown_warning :
 // - #pragma __debug
 def warn_pragma_debug_unexpected_command : Warning<
   "unexpected debug command '%0'">, InGroup<IgnoredPragmas>;
+def warn_pragma_debug_missing_argument : Warning<
+  "missing argument to debug command '%0'">, InGroup<IgnoredPragmas>;
 
 def err_defined_macro_name : Error<"'defined' cannot be used as a macro name">;
 def err_paste_at_start : Error<
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 59f5095..6ba482c 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1,4 +1,3 @@
-
 //==--- DiagnosticSemaKinds.td - libsema diagnostics ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -7643,6 +7642,10 @@ def err_wrong_sampler_addressspace: Error<
   "sampler type cannot be used with the __local and __global address space qualifiers">;
 def err_opencl_global_invalid_addr_space : Error<
   "program scope variable must reside in %0 address space">;
+def err_missing_actual_pipe_type : Error<
+  "missing actual type specifier for pipe">;
+def err_reference_pipe_type : Error <
+  "pipes packet types cannot be of reference type">;
 def err_opencl_no_main : Error<"%select{function|kernel}0 cannot be called 'main'">;
 def err_opencl_kernel_attr :
   Error<"attribute %0 can only be applied to a kernel function">;
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h b/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h
index 1d59d64..e284171 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h
@@ -36,6 +36,11 @@ namespace clang {
     TSS_unsigned
   };
   
+  enum TypeSpecifiersPipe {
+    TSP_unspecified,
+    TSP_pipe
+  };
+
   /// \brief Specifies the kind of type.
   enum TypeSpecifierType {
     TST_unspecified,
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def b/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def
index 9252d99..0269451 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def
@@ -519,6 +519,8 @@ KEYWORD(vec_step                    , KEYOPENCL|KEYALTIVEC|KEYZVECTOR)
 // OpenMP Type Traits
 KEYWORD(__builtin_omp_required_simd_align, KEYALL)
 
+KEYWORD(pipe                        , KEYOPENCL)
+
 // Borland Extensions.
 KEYWORD(__pascal                    , KEYALL)
 
@@ -697,6 +699,11 @@ ANNOTATION(pragma_parser_crash)
 // handles them.
 ANNOTATION(pragma_captured)
 
+// Annotation for #pragma clang __debug dump...
+// The lexer produces these so that the parser and semantic analysis can
+// look up and dump the operand.
+ANNOTATION(pragma_dump)
+
 // Annotation for #pragma ms_struct...
 // The lexer produces these so that they only take effect when the parser
 // handles them.
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/VirtualFileSystem.h b/contrib/llvm/tools/clang/include/clang/Basic/VirtualFileSystem.h
index 1df4947..bab88c9 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/VirtualFileSystem.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/VirtualFileSystem.h
@@ -299,10 +299,7 @@ public:
   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
     return WorkingDirectory;
   }
-  std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
-    WorkingDirectory = Path.str();
-    return std::error_code();
-  }
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
 };
 
 /// \brief Get a globally unique ID for a virtual file or directory.
diff --git a/contrib/llvm/tools/clang/include/clang/CodeGen/BackendUtil.h b/contrib/llvm/tools/clang/include/clang/CodeGen/BackendUtil.h
index ba5dc39..d375a78 100644
--- a/contrib/llvm/tools/clang/include/clang/CodeGen/BackendUtil.h
+++ b/contrib/llvm/tools/clang/include/clang/CodeGen/BackendUtil.h
@@ -33,12 +33,10 @@ namespace clang {
     Backend_EmitObj        ///< Emit native object files
   };
 
-  void
-  EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts,
-                    const TargetOptions &TOpts, const LangOptions &LOpts,
-                    StringRef TDesc, llvm::Module *M, BackendAction Action,
-                    raw_pwrite_stream *OS,
-                    std::unique_ptr<llvm::FunctionInfoIndex> Index = nullptr);
+  void EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts,
+                         const TargetOptions &TOpts, const LangOptions &LOpts,
+                         StringRef TDesc, llvm::Module *M, BackendAction Action,
+                         raw_pwrite_stream *OS);
 }
 
 #endif
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Action.h b/contrib/llvm/tools/clang/include/clang/Driver/Action.h
index fc31d4b..c5b0f47 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/Action.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Action.h
@@ -15,6 +15,9 @@
 #include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
+
+class StringRef;
+
 namespace opt {
   class Arg;
 }
@@ -32,6 +35,9 @@ namespace driver {
 /// single primary output, at least in terms of controlling the
 /// compilation. Actions can produce auxiliary files, but can only
 /// produce a single output to feed into subsequent actions.
+///
+/// Actions are usually owned by a Compilation, which creates new
+/// actions via MakeAction().
 class Action {
 public:
   typedef ActionList::size_type size_type;
@@ -70,27 +76,20 @@ private:
 
   ActionList Inputs;
 
-  unsigned OwnsInputs : 1;
-
 protected:
-  Action(ActionClass Kind, types::ID Type)
-    : Kind(Kind), Type(Type), OwnsInputs(true)  {}
-  Action(ActionClass Kind, std::unique_ptr<Action> Input, types::ID Type)
-      : Kind(Kind), Type(Type), Inputs(1, Input.release()), OwnsInputs(true) {
-  }
-  Action(ActionClass Kind, std::unique_ptr<Action> Input)
-      : Kind(Kind), Type(Input->getType()), Inputs(1, Input.release()),
-        OwnsInputs(true) {}
+  Action(ActionClass Kind, types::ID Type) : Action(Kind, ActionList(), Type) {}
+  Action(ActionClass Kind, Action *Input, types::ID Type)
+      : Action(Kind, ActionList({Input}), Type) {}
+  Action(ActionClass Kind, Action *Input)
+      : Action(Kind, ActionList({Input}), Input->getType()) {}
   Action(ActionClass Kind, const ActionList &Inputs, types::ID Type)
-    : Kind(Kind), Type(Type), Inputs(Inputs), OwnsInputs(true) {}
+      : Kind(Kind), Type(Type), Inputs(Inputs) {}
+
 public:
   virtual ~Action();
 
   const char *getClassName() const { return Action::getClassName(getKind()); }
 
-  bool getOwnsInputs() { return OwnsInputs; }
-  void setOwnsInputs(bool Value) { OwnsInputs = Value; }
-
   ActionClass getKind() const { return Kind; }
   types::ID getType() const { return Type; }
 
@@ -126,7 +125,7 @@ class BindArchAction : public Action {
   const char *ArchName;
 
 public:
-  BindArchAction(std::unique_ptr<Action> Input, const char *ArchName);
+  BindArchAction(Action *Input, const char *ArchName);
 
   const char *getArchName() const { return ArchName; }
 
@@ -137,19 +136,24 @@ public:
 
 class CudaDeviceAction : public Action {
   virtual void anchor();
-  /// GPU architecture to bind -- e.g 'sm_35'.
+  /// GPU architecture to bind.  Always of the form /sm_\d+/.
   const char *GpuArchName;
   /// True when action results are not consumed by the host action (e.g when
   /// -fsyntax-only or --cuda-device-only options are used).
   bool AtTopLevel;
 
 public:
-  CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName,
-                   bool AtTopLevel);
+  CudaDeviceAction(Action *Input, const char *ArchName, bool AtTopLevel);
 
   const char *getGpuArchName() const { return GpuArchName; }
+
+  /// Gets the compute_XX that corresponds to getGpuArchName().
+  const char *getComputeArchName() const;
+
   bool isAtTopLevel() const { return AtTopLevel; }
 
+  static bool IsValidGpuArchName(llvm::StringRef ArchName);
+
   static bool classof(const Action *A) {
     return A->getKind() == CudaDeviceClass;
   }
@@ -160,9 +164,7 @@ class CudaHostAction : public Action {
   ActionList DeviceActions;
 
 public:
-  CudaHostAction(std::unique_ptr<Action> Input,
-                 const ActionList &DeviceActions);
-  ~CudaHostAction() override;
+  CudaHostAction(Action *Input, const ActionList &DeviceActions);
 
   const ActionList &getDeviceActions() const { return DeviceActions; }
 
@@ -172,7 +174,7 @@ public:
 class JobAction : public Action {
   virtual void anchor();
 protected:
-  JobAction(ActionClass Kind, std::unique_ptr<Action> Input, types::ID Type);
+  JobAction(ActionClass Kind, Action *Input, types::ID Type);
   JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type);
 
 public:
@@ -185,7 +187,7 @@ public:
 class PreprocessJobAction : public JobAction {
   void anchor() override;
 public:
-  PreprocessJobAction(std::unique_ptr<Action> Input, types::ID OutputType);
+  PreprocessJobAction(Action *Input, types::ID OutputType);
 
   static bool classof(const Action *A) {
     return A->getKind() == PreprocessJobClass;
@@ -195,7 +197,7 @@ public:
 class PrecompileJobAction : public JobAction {
   void anchor() override;
 public:
-  PrecompileJobAction(std::unique_ptr<Action> Input, types::ID OutputType);
+  PrecompileJobAction(Action *Input, types::ID OutputType);
 
   static bool classof(const Action *A) {
     return A->getKind() == PrecompileJobClass;
@@ -205,7 +207,7 @@ public:
 class AnalyzeJobAction : public JobAction {
   void anchor() override;
 public:
-  AnalyzeJobAction(std::unique_ptr<Action> Input, types::ID OutputType);
+  AnalyzeJobAction(Action *Input, types::ID OutputType);
 
   static bool classof(const Action *A) {
     return A->getKind() == AnalyzeJobClass;
@@ -215,7 +217,7 @@ public:
 class MigrateJobAction : public JobAction {
   void anchor() override;
 public:
-  MigrateJobAction(std::unique_ptr<Action> Input, types::ID OutputType);
+  MigrateJobAction(Action *Input, types::ID OutputType);
 
   static bool classof(const Action *A) {
     return A->getKind() == MigrateJobClass;
@@ -225,7 +227,7 @@ public:
 class CompileJobAction : public JobAction {
   void anchor() override;
 public:
-  CompileJobAction(std::unique_ptr<Action> Input, types::ID OutputType);
+  CompileJobAction(Action *Input, types::ID OutputType);
 
   static bool classof(const Action *A) {
     return A->getKind() == CompileJobClass;
@@ -235,7 +237,7 @@ public:
 class BackendJobAction : public JobAction {
   void anchor() override;
 public:
-  BackendJobAction(std::unique_ptr<Action> Input, types::ID OutputType);
+  BackendJobAction(Action *Input, types::ID OutputType);
 
   static bool classof(const Action *A) {
     return A->getKind() == BackendJobClass;
@@ -245,7 +247,7 @@ public:
 class AssembleJobAction : public JobAction {
   void anchor() override;
 public:
-  AssembleJobAction(std::unique_ptr<Action> Input, types::ID OutputType);
+  AssembleJobAction(Action *Input, types::ID OutputType);
 
   static bool classof(const Action *A) {
     return A->getKind() == AssembleJobClass;
@@ -285,8 +287,7 @@ public:
 class VerifyJobAction : public JobAction {
   void anchor() override;
 public:
-  VerifyJobAction(ActionClass Kind, std::unique_ptr<Action> Input,
-                  types::ID Type);
+  VerifyJobAction(ActionClass Kind, Action *Input, types::ID Type);
   static bool classof(const Action *A) {
     return A->getKind() == VerifyDebugInfoJobClass ||
            A->getKind() == VerifyPCHJobClass;
@@ -296,7 +297,7 @@ public:
 class VerifyDebugInfoJobAction : public VerifyJobAction {
   void anchor() override;
 public:
-  VerifyDebugInfoJobAction(std::unique_ptr<Action> Input, types::ID Type);
+  VerifyDebugInfoJobAction(Action *Input, types::ID Type);
   static bool classof(const Action *A) {
     return A->getKind() == VerifyDebugInfoJobClass;
   }
@@ -305,7 +306,7 @@ public:
 class VerifyPCHJobAction : public VerifyJobAction {
   void anchor() override;
 public:
-  VerifyPCHJobAction(std::unique_ptr<Action> Input, types::ID Type);
+  VerifyPCHJobAction(Action *Input, types::ID Type);
   static bool classof(const Action *A) {
     return A->getKind() == VerifyPCHJobClass;
   }
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Compilation.h b/contrib/llvm/tools/clang/include/clang/Driver/Compilation.h
index 12ff068..3ed1913 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/Compilation.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Compilation.h
@@ -48,7 +48,12 @@ class Compilation {
   /// own argument translation.
   llvm::opt::DerivedArgList *TranslatedArgs;
 
-  /// The list of actions.
+  /// The list of actions we've created via MakeAction.  This is not accessible
+  /// to consumers; it's here just to manage ownership.
+  std::vector<std::unique_ptr<Action>> AllActions;
+
+  /// The list of actions.  This is maintained and modified by consumers, via
+  /// getActions().
   ActionList Actions;
 
   /// The root list of jobs.
@@ -105,6 +110,15 @@ public:
   ActionList &getActions() { return Actions; }
   const ActionList &getActions() const { return Actions; }
 
+  /// Creates a new Action owned by this Compilation.
+  ///
+  /// The new Action is *not* added to the list returned by getActions().
+  template <typename T, typename... Args> T *MakeAction(Args &&... Arg) {
+    T *RawPtr = new T(std::forward<Args>(Arg)...);
+    AllActions.push_back(std::unique_ptr<Action>(RawPtr));
+    return RawPtr;
+  }
+
   JobList &getJobs() { return Jobs; }
   const JobList &getJobs() const { return Jobs; }
 
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Driver.h b/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
index c9940ba..a229779 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
@@ -18,10 +18,10 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Support/Path.h" // FIXME: Kill when CompilationInfo
-#include <memory>
-                              // lands.
+#include "llvm/Support/Path.h" // FIXME: Kill when CompilationInfo lands.
+
 #include <list>
+#include <memory>
 #include <set>
 #include <string>
 
@@ -375,20 +375,16 @@ public:
   /// ConstructAction - Construct the appropriate action to do for
   /// \p Phase on the \p Input, taking in to account arguments
   /// like -fsyntax-only or --analyze.
-  std::unique_ptr<Action>
-  ConstructPhaseAction(const ToolChain &TC, const llvm::opt::ArgList &Args,
-                       phases::ID Phase, std::unique_ptr<Action> Input) const;
+  Action *ConstructPhaseAction(Compilation &C, const ToolChain &TC,
+                               const llvm::opt::ArgList &Args, phases::ID Phase,
+                               Action *Input) const;
 
   /// BuildJobsForAction - Construct the jobs to perform for the
-  /// action \p A.
-  void BuildJobsForAction(Compilation &C,
-                          const Action *A,
-                          const ToolChain *TC,
-                          const char *BoundArch,
-                          bool AtTopLevel,
-                          bool MultipleArchs,
-                          const char *LinkingOutput,
-                          InputInfo &Result) const;
+  /// action \p A and return an InputInfo for the result of running \p A.
+  InputInfo BuildJobsForAction(Compilation &C, const Action *A,
+                               const ToolChain *TC, const char *BoundArch,
+                               bool AtTopLevel, bool MultipleArchs,
+                               const char *LinkingOutput) const;
 
   /// Returns the default name for linked images (e.g., "a.out").
   const char *getDefaultImageName() const;
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td
index e219a9b..e4279e8 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td
@@ -1906,13 +1906,13 @@ def _ : Joined<["--"], "">, Flags<[Unsupported]>;
 
 def mieee_rnd_near : Flag<["-"], "mieee-rnd-near">, Group<m_hexagon_Features_Group>;
 def mv4 : Flag<["-"], "mv4">, Group<m_hexagon_Features_Group>,
-          Alias<mcpu_EQ>, AliasArgs<["v4"]>;
+          Alias<mcpu_EQ>, AliasArgs<["hexagonv4"]>;
 def mv5 : Flag<["-"], "mv5">, Group<m_hexagon_Features_Group>, Alias<mcpu_EQ>,
-          AliasArgs<["v5"]>;
+          AliasArgs<["hexagonv5"]>;
 def mv55 : Flag<["-"], "mv55">, Group<m_hexagon_Features_Group>,
-           Alias<mcpu_EQ>, AliasArgs<["v55"]>;
+           Alias<mcpu_EQ>, AliasArgs<["hexagonv55"]>;
 def mv60 : Flag<["-"], "mv60">, Group<m_hexagon_Features_Group>,
-           Alias<mcpu_EQ>, AliasArgs<["v60"]>;
+           Alias<mcpu_EQ>, AliasArgs<["hexagonv60"]>;
 def mhexagon_hvx : Flag<["-"], "mhvx">, Group<m_hexagon_Features_Group>,
     Flags<[CC1Option]>, HelpText<"Enable Hexagon Vector eXtensions">;
 def mno_hexagon_hvx : Flag<["-"], "mno-hvx">, Group<m_hexagon_Features_Group>,
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h b/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
index ed73107..7e68d0a 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
@@ -134,7 +134,7 @@ public:
   StringRef getOS() const { return Triple.getOSName(); }
 
   /// \brief Provide the default architecture name (as expected by -arch) for
-  /// this toolchain. Note t
+  /// this toolchain.
   StringRef getDefaultUniversalArchName() const;
 
   std::string getTripleString() const {
diff --git a/contrib/llvm/tools/clang/include/clang/Lex/LiteralSupport.h b/contrib/llvm/tools/clang/include/clang/Lex/LiteralSupport.h
index 5210e3f..d568614 100644
--- a/contrib/llvm/tools/clang/include/clang/Lex/LiteralSupport.h
+++ b/contrib/llvm/tools/clang/include/clang/Lex/LiteralSupport.h
@@ -166,6 +166,7 @@ public:
   bool hadError() const { return HadError; }
   bool isAscii() const { return Kind == tok::char_constant; }
   bool isWide() const { return Kind == tok::wide_char_constant; }
+  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
   bool isUTF16() const { return Kind == tok::utf16_char_constant; }
   bool isUTF32() const { return Kind == tok::utf32_char_constant; }
   bool isMultiChar() const { return IsMultiChar; }
diff --git a/contrib/llvm/tools/clang/include/clang/Parse/Parser.h b/contrib/llvm/tools/clang/include/clang/Parse/Parser.h
index 82b7798..00885a5 100644
--- a/contrib/llvm/tools/clang/include/clang/Parse/Parser.h
+++ b/contrib/llvm/tools/clang/include/clang/Parse/Parser.h
@@ -502,6 +502,10 @@ private:
   void HandlePragmaAlign();
 
   /// \brief Handle the annotation token produced for
+  /// #pragma clang __debug dump...
+  void HandlePragmaDump();
+
+  /// \brief Handle the annotation token produced for
   /// #pragma weak id...
   void HandlePragmaWeak();
 
@@ -1640,13 +1644,22 @@ private:
   /// A SmallVector of types.
   typedef SmallVector<ParsedType, 12> TypeVector;
 
-  StmtResult ParseStatement(SourceLocation *TrailingElseLoc = nullptr);
+  StmtResult ParseStatement(SourceLocation *TrailingElseLoc = nullptr,
+                            bool AllowOpenMPStandalone = false);
+  enum AllowedContsructsKind {
+    /// \brief Allow any declarations, statements, OpenMP directives.
+    ACK_Any,
+    /// \brief Allow only statements and non-standalone OpenMP directives.
+    ACK_StatementsOpenMPNonStandalone,
+    /// \brief Allow statements and all executable OpenMP directives
+    ACK_StatementsOpenMPAnyExecutable
+  };
   StmtResult
-  ParseStatementOrDeclaration(StmtVector &Stmts, bool OnlyStatement,
+  ParseStatementOrDeclaration(StmtVector &Stmts, AllowedContsructsKind Allowed,
                               SourceLocation *TrailingElseLoc = nullptr);
   StmtResult ParseStatementOrDeclarationAfterAttributes(
                                          StmtVector &Stmts,
-                                         bool OnlyStatement,
+                                         AllowedContsructsKind Allowed,
                                          SourceLocation *TrailingElseLoc,
                                          ParsedAttributesWithRange &Attrs);
   StmtResult ParseExprStatement();
@@ -1674,7 +1687,8 @@ private:
   StmtResult ParseReturnStatement();
   StmtResult ParseAsmStatement(bool &msAsm);
   StmtResult ParseMicrosoftAsmStatement(SourceLocation AsmLoc);
-  StmtResult ParsePragmaLoopHint(StmtVector &Stmts, bool OnlyStatement,
+  StmtResult ParsePragmaLoopHint(StmtVector &Stmts,
+                                 AllowedContsructsKind Allowed,
                                  SourceLocation *TrailingElseLoc,
                                  ParsedAttributesWithRange &Attrs);
 
@@ -2439,11 +2453,13 @@ private:
                                 bool AllowScopeSpecifier);
   /// \brief Parses declarative or executable directive.
   ///
-  /// \param StandAloneAllowed true if allowed stand-alone directives,
-  /// false - otherwise
+  /// \param Allowed ACK_Any, if any directives are allowed,
+  /// ACK_StatementsOpenMPAnyExecutable - if any executable directives are
+  /// allowed, ACK_StatementsOpenMPNonStandalone - if only non-standalone
+  /// executable directives are allowed.
   ///
   StmtResult
-  ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed);
+  ParseOpenMPDeclarativeOrExecutableDirective(AllowedContsructsKind Allowed);
   /// \brief Parses clause of kind \a CKind for directive of a kind \a Kind.
   ///
   /// \param DKind Kind of current directive.
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h b/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h
index e9fdb70..064d37b 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h
@@ -337,6 +337,7 @@ private:
   unsigned TypeAltiVecPixel : 1;
   unsigned TypeAltiVecBool : 1;
   unsigned TypeSpecOwned : 1;
+  unsigned TypeSpecPipe : 1;
 
   // type-qualifiers
   unsigned TypeQualifiers : 4;  // Bitwise OR of TQ.
@@ -385,6 +386,7 @@ private:
   SourceLocation FS_inlineLoc, FS_virtualLoc, FS_explicitLoc, FS_noreturnLoc;
   SourceLocation FS_forceinlineLoc;
   SourceLocation FriendLoc, ModulePrivateLoc, ConstexprLoc, ConceptLoc;
+  SourceLocation TQ_pipeLoc;
 
   WrittenBuiltinSpecs writtenBS;
   void SaveWrittenBuiltinSpecs();
@@ -420,6 +422,7 @@ public:
       TypeAltiVecPixel(false),
       TypeAltiVecBool(false),
       TypeSpecOwned(false),
+      TypeSpecPipe(false),
       TypeQualifiers(TQ_unspecified),
       FS_inline_specified(false),
       FS_forceinline_specified(false),
@@ -473,6 +476,7 @@ public:
   bool isTypeAltiVecBool() const { return TypeAltiVecBool; }
   bool isTypeSpecOwned() const { return TypeSpecOwned; }
   bool isTypeRep() const { return isTypeRep((TST) TypeSpecType); }
+  bool isTypeSpecPipe() const { return TypeSpecPipe; }
 
   ParsedType getRepAsType() const {
     assert(isTypeRep((TST) TypeSpecType) && "DeclSpec does not store a type");
@@ -532,6 +536,7 @@ public:
   SourceLocation getRestrictSpecLoc() const { return TQ_restrictLoc; }
   SourceLocation getVolatileSpecLoc() const { return TQ_volatileLoc; }
   SourceLocation getAtomicSpecLoc() const { return TQ_atomicLoc; }
+  SourceLocation getPipeLoc() const { return TQ_pipeLoc; }
 
   /// \brief Clear out all of the type qualifiers.
   void ClearTypeQualifiers() {
@@ -540,6 +545,7 @@ public:
     TQ_restrictLoc = SourceLocation();
     TQ_volatileLoc = SourceLocation();
     TQ_atomicLoc = SourceLocation();
+    TQ_pipeLoc = SourceLocation();
   }
 
   // function-specifier
@@ -643,6 +649,9 @@ public:
   bool SetTypeAltiVecBool(bool isAltiVecBool, SourceLocation Loc,
                        const char *&PrevSpec, unsigned &DiagID,
                        const PrintingPolicy &Policy);
+  bool SetTypePipe(bool isPipe, SourceLocation Loc,
+                       const char *&PrevSpec, unsigned &DiagID,
+                       const PrintingPolicy &Policy);
   bool SetTypeSpecError();
   void UpdateDeclRep(Decl *Rep) {
     assert(isDeclRep((TST) TypeSpecType));
@@ -1081,7 +1090,7 @@ typedef SmallVector<Token, 4> CachedTokens;
 /// This is intended to be a small value object.
 struct DeclaratorChunk {
   enum {
-    Pointer, Reference, Array, Function, BlockPointer, MemberPointer, Paren
+    Pointer, Reference, Array, Function, BlockPointer, MemberPointer, Paren, Pipe
   } Kind;
 
   /// Loc - The place where this type was defined.
@@ -1409,6 +1418,13 @@ struct DeclaratorChunk {
     }
   };
 
+  struct PipeTypeInfo : TypeInfoCommon {
+  /// The access writes.
+  unsigned AccessWrites : 3;
+
+  void destroy() {}
+  };
+
   union {
     TypeInfoCommon        Common;
     PointerTypeInfo       Ptr;
@@ -1417,6 +1433,7 @@ struct DeclaratorChunk {
     FunctionTypeInfo      Fun;
     BlockPointerTypeInfo  Cls;
     MemberPointerTypeInfo Mem;
+    PipeTypeInfo          PipeInfo;
   };
 
   void destroy() {
@@ -1428,6 +1445,7 @@ struct DeclaratorChunk {
     case DeclaratorChunk::Array:         return Arr.destroy();
     case DeclaratorChunk::MemberPointer: return Mem.destroy();
     case DeclaratorChunk::Paren:         return;
+    case DeclaratorChunk::Pipe:          return PipeInfo.destroy();
     }
   }
 
@@ -1526,6 +1544,17 @@ struct DeclaratorChunk {
     return I;
   }
 
+  /// \brief Return a DeclaratorChunk for a block.
+  static DeclaratorChunk getPipe(unsigned TypeQuals,
+                                 SourceLocation Loc) {
+    DeclaratorChunk I;
+    I.Kind          = Pipe;
+    I.Loc           = Loc;
+    I.Cls.TypeQuals = TypeQuals;
+    I.Cls.AttrList  = 0;
+    return I;
+  }
+
   static DeclaratorChunk getMemberPointer(const CXXScopeSpec &SS,
                                           unsigned TypeQuals,
                                           SourceLocation Loc) {
@@ -2026,6 +2055,7 @@ public:
       case DeclaratorChunk::Array:
       case DeclaratorChunk::BlockPointer:
       case DeclaratorChunk::MemberPointer:
+      case DeclaratorChunk::Pipe:
         return false;
       }
       llvm_unreachable("Invalid type chunk");
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h b/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h
index 87c40f0..7efb19f 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h
@@ -515,6 +515,7 @@ public:
     configure();
   }
 
+  void dump();
   void print(raw_ostream &);
 
   /// Suppress the diagnostics that would normally fire because of this
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Overload.h b/contrib/llvm/tools/clang/include/clang/Sema/Overload.h
index 20958b0..6243795 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/Overload.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Overload.h
@@ -570,8 +570,8 @@ namespace clang {
     /// This conversion candidate is not viable because its result
     /// type is not implicitly convertible to the desired type.
     ovl_fail_bad_final_conversion,
-    
-    /// This conversion function template specialization candidate is not 
+
+    /// This conversion function template specialization candidate is not
     /// viable because the final conversion was not an exact match.
     ovl_fail_final_conversion_not_exact,
 
@@ -582,7 +582,10 @@ namespace clang {
 
     /// This candidate function was not viable because an enable_if
     /// attribute disabled it.
-    ovl_fail_enable_if
+    ovl_fail_enable_if,
+
+    /// This candidate was not viable because its address could not be taken.
+    ovl_fail_addr_not_available
   };
 
   /// OverloadCandidate - A single candidate in an overload set (C++ 13.3).
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
index 77d06f2..ffe1ff3 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
@@ -1269,6 +1269,8 @@ public:
                                  SourceLocation Loc, DeclarationName Entity);
   QualType BuildParenType(QualType T);
   QualType BuildAtomicType(QualType T, SourceLocation Loc);
+  QualType BuildPipeType(QualType T,
+                         SourceLocation Loc);
 
   TypeSourceInfo *GetTypeForDeclarator(Declarator &D, Scope *S);
   TypeSourceInfo *GetTypeForDeclaratorCast(Declarator &D, QualType FromTy);
@@ -2548,7 +2550,8 @@ public:
                                      MultiExprArg Args,
                                      SourceLocation RParenLoc,
                                      Expr *ExecConfig,
-                                     bool AllowTypoCorrection=true);
+                                     bool AllowTypoCorrection=true,
+                                     bool CalleesAddressIsTaken=false);
 
   bool buildOverloadedCallSet(Scope *S, Expr *Fn, UnresolvedLookupExpr *ULE,
                               MultiExprArg Args, SourceLocation RParenLoc,
@@ -7626,6 +7629,9 @@ public:
   void ActOnPragmaMSInitSeg(SourceLocation PragmaLocation,
                             StringLiteral *SegmentName);
 
+  /// \brief Called on #pragma clang __debug dump II
+  void ActOnPragmaDump(Scope *S, SourceLocation Loc, IdentifierInfo *II);
+
   /// ActOnPragmaDetectMismatch - Call on well-formed \#pragma detect_mismatch
   void ActOnPragmaDetectMismatch(StringRef Name, StringRef Value);
 
@@ -8583,6 +8589,10 @@ public:
   bool CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty,
                        CastKind &Kind);
 
+  /// \brief Prepare `SplattedExpr` for a vector splat operation, adding
+  /// implicit casts if necessary.
+  ExprResult prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr);
+
   // CheckExtVectorCast - check type constraints for extended vectors.
   // Since vectors are an extension, there are no C standard reference for this.
   // We allow casting between vectors and integer datatypes of the same size,
diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
index 16bda6e..0dfb8cf 100644
--- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
@@ -907,7 +907,9 @@ namespace clang {
       /// \brief A DecayedType record.
       TYPE_DECAYED               = 41,
       /// \brief An AdjustedType record.
-      TYPE_ADJUSTED              = 42
+      TYPE_ADJUSTED              = 42,
+      /// \brief A PipeType record.
+      TYPE_PIPE                  = 43
     };
 
     /// \brief The type IDs for special types constructed by semantic
diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h
index ed34547..ef8c653 100644
--- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h
@@ -871,6 +871,7 @@ public:
                               const FunctionDecl *Delete) override;
   void CompletedImplicitDefinition(const FunctionDecl *D) override;
   void StaticDataMemberInstantiated(const VarDecl *D) override;
+  void DefaultArgumentInstantiated(const ParmVarDecl *D) override;
   void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
   void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                     const ObjCInterfaceDecl *IFD) override;
diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index bb835c4..43f6e5c 100644
--- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -80,7 +80,7 @@ class MemRegion : public llvm::FoldingSetNode {
 public:
   enum Kind {
     // Memory spaces.
-    GenericMemSpaceRegionKind,
+    CodeSpaceRegionKind,
     StackLocalsSpaceRegionKind,
     StackArgumentsSpaceRegionKind,
     HeapSpaceRegionKind,
@@ -89,29 +89,29 @@ public:
     GlobalInternalSpaceRegionKind,
     GlobalSystemSpaceRegionKind,
     GlobalImmutableSpaceRegionKind,
-    BEG_NON_STATIC_GLOBAL_MEMSPACES = GlobalInternalSpaceRegionKind,
+    BEGIN_NON_STATIC_GLOBAL_MEMSPACES = GlobalInternalSpaceRegionKind,
     END_NON_STATIC_GLOBAL_MEMSPACES = GlobalImmutableSpaceRegionKind,
-    BEG_GLOBAL_MEMSPACES = StaticGlobalSpaceRegionKind,
+    BEGIN_GLOBAL_MEMSPACES = StaticGlobalSpaceRegionKind,
     END_GLOBAL_MEMSPACES = GlobalImmutableSpaceRegionKind,
-    BEG_MEMSPACES = GenericMemSpaceRegionKind,
+    BEGIN_MEMSPACES = CodeSpaceRegionKind,
     END_MEMSPACES = GlobalImmutableSpaceRegionKind,
     // Untyped regions.
     SymbolicRegionKind,
     AllocaRegionKind,
     // Typed regions.
-    BEG_TYPED_REGIONS,
-    FunctionTextRegionKind = BEG_TYPED_REGIONS,
-    BlockTextRegionKind,
+    BEGIN_TYPED_REGIONS,
+    FunctionCodeRegionKind = BEGIN_TYPED_REGIONS,
+    BlockCodeRegionKind,
     BlockDataRegionKind,
-    BEG_TYPED_VALUE_REGIONS,
-    CompoundLiteralRegionKind = BEG_TYPED_VALUE_REGIONS,
+    BEGIN_TYPED_VALUE_REGIONS,
+    CompoundLiteralRegionKind = BEGIN_TYPED_VALUE_REGIONS,
     CXXThisRegionKind,
     StringRegionKind,
     ObjCStringRegionKind,
     ElementRegionKind,
     // Decl Regions.
-    BEG_DECL_REGIONS,
-    VarRegionKind = BEG_DECL_REGIONS,
+    BEGIN_DECL_REGIONS,
+    VarRegionKind = BEGIN_DECL_REGIONS,
     FieldRegionKind,
     ObjCIvarRegionKind,
     END_DECL_REGIONS = ObjCIvarRegionKind,
@@ -193,12 +193,9 @@ public:
 ///  for example, the set of global variables, the stack frame, etc.
 class MemSpaceRegion : public MemRegion {
 protected:
-  friend class MemRegionManager;
-  
   MemRegionManager *Mgr;
 
-  MemSpaceRegion(MemRegionManager *mgr, Kind k = GenericMemSpaceRegionKind)
-    : MemRegion(k), Mgr(mgr) {
+  MemSpaceRegion(MemRegionManager *mgr, Kind k) : MemRegion(k), Mgr(mgr) {
     assert(classof(this));
   }
 
@@ -211,10 +208,26 @@ public:
 
   static bool classof(const MemRegion *R) {
     Kind k = R->getKind();
-    return k >= BEG_MEMSPACES && k <= END_MEMSPACES;
+    return k >= BEGIN_MEMSPACES && k <= END_MEMSPACES;
   }
 };
-  
+
+/// CodeSpaceRegion - The memory space that holds the executable code of
+/// functions and blocks.
+class CodeSpaceRegion : public MemSpaceRegion {
+  friend class MemRegionManager;
+
+  CodeSpaceRegion(MemRegionManager *mgr)
+      : MemSpaceRegion(mgr, CodeSpaceRegionKind) {}
+
+public:
+  void dumpToStream(raw_ostream &os) const override;
+
+  static bool classof(const MemRegion *R) {
+    return R->getKind() == CodeSpaceRegionKind;
+  }
+};
+
 class GlobalsSpaceRegion : public MemSpaceRegion {
   virtual void anchor();
 protected:
@@ -223,7 +236,7 @@ protected:
 public:
   static bool classof(const MemRegion *R) {
     Kind k = R->getKind();
-    return k >= BEG_GLOBAL_MEMSPACES && k <= END_GLOBAL_MEMSPACES;
+    return k >= BEGIN_GLOBAL_MEMSPACES && k <= END_GLOBAL_MEMSPACES;
   }
 };
 
@@ -259,17 +272,15 @@ public:
 /// RegionStoreManager::invalidateRegions (instead of finding all the dependent
 /// globals, we invalidate the whole parent region).
 class NonStaticGlobalSpaceRegion : public GlobalsSpaceRegion {
-  friend class MemRegionManager;
-  
 protected:
   NonStaticGlobalSpaceRegion(MemRegionManager *mgr, Kind k)
     : GlobalsSpaceRegion(mgr, k) {}
-  
+
 public:
 
   static bool classof(const MemRegion *R) {
     Kind k = R->getKind();
-    return k >= BEG_NON_STATIC_GLOBAL_MEMSPACES &&
+    return k >= BEGIN_NON_STATIC_GLOBAL_MEMSPACES &&
            k <= END_NON_STATIC_GLOBAL_MEMSPACES;
   }
 };
@@ -357,7 +368,7 @@ public:
     return R->getKind() == UnknownSpaceRegionKind;
   }
 };
-  
+
 class StackSpaceRegion : public MemSpaceRegion {
 private:
   const StackFrameContext *SFC;
@@ -368,18 +379,18 @@ protected:
     assert(classof(this));
   }
 
-public:  
+public:
   const StackFrameContext *getStackFrame() const { return SFC; }
-  
+
   void Profile(llvm::FoldingSetNodeID &ID) const override;
 
   static bool classof(const MemRegion *R) {
     Kind k = R->getKind();
     return k >= StackLocalsSpaceRegionKind &&
            k <= StackArgumentsSpaceRegionKind;
-  }  
+  }
 };
-  
+
 class StackLocalsSpaceRegion : public StackSpaceRegion {
   virtual void anchor();
   friend class MemRegionManager;
@@ -491,7 +502,7 @@ public:
 
   static bool classof(const MemRegion* R) {
     unsigned k = R->getKind();
-    return k >= BEG_TYPED_REGIONS && k <= END_TYPED_REGIONS;
+    return k >= BEGIN_TYPED_REGIONS && k <= END_TYPED_REGIONS;
   }
 };
 
@@ -523,7 +534,7 @@ public:
 
   static bool classof(const MemRegion* R) {
     unsigned k = R->getKind();
-    return k >= BEG_TYPED_VALUE_REGIONS && k <= END_TYPED_VALUE_REGIONS;
+    return k >= BEGIN_TYPED_VALUE_REGIONS && k <= END_TYPED_VALUE_REGIONS;
   }
 };
 
@@ -538,16 +549,16 @@ public:
 
   static bool classof(const MemRegion* R) {
     Kind k = R->getKind();
-    return k >= FunctionTextRegionKind && k <= BlockTextRegionKind;
+    return k >= FunctionCodeRegionKind && k <= BlockCodeRegionKind;
   }
 };
 
-/// FunctionTextRegion - A region that represents code texts of function.
-class FunctionTextRegion : public CodeTextRegion {
+/// FunctionCodeRegion - A region that represents code texts of function.
+class FunctionCodeRegion : public CodeTextRegion {
   const NamedDecl *FD;
 public:
-  FunctionTextRegion(const NamedDecl *fd, const MemRegion* sreg)
-    : CodeTextRegion(sreg, FunctionTextRegionKind), FD(fd) {
+  FunctionCodeRegion(const NamedDecl *fd, const MemRegion* sreg)
+    : CodeTextRegion(sreg, FunctionCodeRegionKind), FD(fd) {
     assert(isa<ObjCMethodDecl>(fd) || isa<FunctionDecl>(fd));
   }
 
@@ -577,27 +588,27 @@ public:
                             const MemRegion*);
   
   static bool classof(const MemRegion* R) {
-    return R->getKind() == FunctionTextRegionKind;
+    return R->getKind() == FunctionCodeRegionKind;
   }
 };
   
   
-/// BlockTextRegion - A region that represents code texts of blocks (closures).
-///  Blocks are represented with two kinds of regions.  BlockTextRegions
+/// BlockCodeRegion - A region that represents code texts of blocks (closures).
+///  Blocks are represented with two kinds of regions.  BlockCodeRegions
 ///  represent the "code", while BlockDataRegions represent instances of blocks,
 ///  which correspond to "code+data".  The distinction is important, because
 ///  like a closure a block captures the values of externally referenced
 ///  variables.
-class BlockTextRegion : public CodeTextRegion {
+class BlockCodeRegion : public CodeTextRegion {
   friend class MemRegionManager;
 
   const BlockDecl *BD;
   AnalysisDeclContext *AC;
   CanQualType locTy;
 
-  BlockTextRegion(const BlockDecl *bd, CanQualType lTy,
+  BlockCodeRegion(const BlockDecl *bd, CanQualType lTy,
                   AnalysisDeclContext *ac, const MemRegion* sreg)
-    : CodeTextRegion(sreg, BlockTextRegionKind), BD(bd), AC(ac), locTy(lTy) {}
+    : CodeTextRegion(sreg, BlockCodeRegionKind), BD(bd), AC(ac), locTy(lTy) {}
 
 public:
   QualType getLocationType() const override {
@@ -619,32 +630,32 @@ public:
                             const MemRegion*);
   
   static bool classof(const MemRegion* R) {
-    return R->getKind() == BlockTextRegionKind;
+    return R->getKind() == BlockCodeRegionKind;
   }
 };
   
 /// BlockDataRegion - A region that represents a block instance.
-///  Blocks are represented with two kinds of regions.  BlockTextRegions
+///  Blocks are represented with two kinds of regions.  BlockCodeRegions
 ///  represent the "code", while BlockDataRegions represent instances of blocks,
 ///  which correspond to "code+data".  The distinction is important, because
 ///  like a closure a block captures the values of externally referenced
 ///  variables.
 class BlockDataRegion : public TypedRegion {
   friend class MemRegionManager;
-  const BlockTextRegion *BC;
+  const BlockCodeRegion *BC;
   const LocationContext *LC; // Can be null */
   unsigned BlockCount;
   void *ReferencedVars;
   void *OriginalVars;
 
-  BlockDataRegion(const BlockTextRegion *bc, const LocationContext *lc,
+  BlockDataRegion(const BlockCodeRegion *bc, const LocationContext *lc,
                   unsigned count, const MemRegion *sreg)
   : TypedRegion(sreg, BlockDataRegionKind), BC(bc), LC(lc),
      BlockCount(count),
     ReferencedVars(nullptr), OriginalVars(nullptr) {}
 
 public:
-  const BlockTextRegion *getCodeRegion() const { return BC; }
+  const BlockCodeRegion *getCodeRegion() const { return BC; }
   
   const BlockDecl *getDecl() const { return BC->getDecl(); }
 
@@ -691,7 +702,7 @@ public:
 
   void Profile(llvm::FoldingSetNodeID& ID) const override;
 
-  static void ProfileRegion(llvm::FoldingSetNodeID&, const BlockTextRegion *,
+  static void ProfileRegion(llvm::FoldingSetNodeID&, const BlockCodeRegion *,
                             const LocationContext *, unsigned,
                             const MemRegion *);
     
@@ -856,7 +867,7 @@ public:
 
   static bool classof(const MemRegion* R) {
     unsigned k = R->getKind();
-    return k >= BEG_DECL_REGIONS && k <= END_DECL_REGIONS;
+    return k >= BEGIN_DECL_REGIONS && k <= END_DECL_REGIONS;
   }
 };
 
@@ -1138,7 +1149,7 @@ class MemRegionManager {
 
   HeapSpaceRegion *heap;
   UnknownSpaceRegion *unknown;
-  MemSpaceRegion *code;
+  CodeSpaceRegion *code;
 
 public:
   MemRegionManager(ASTContext &c, llvm::BumpPtrAllocator &a)
@@ -1174,9 +1185,9 @@ public:
 
   /// getUnknownRegion - Retrieve the memory region associated with unknown
   /// memory space.
-  const MemSpaceRegion *getUnknownRegion();
+  const UnknownSpaceRegion *getUnknownRegion();
 
-  const MemSpaceRegion *getCodeRegion();
+  const CodeSpaceRegion *getCodeRegion();
 
   /// getAllocaRegion - Retrieve a region associated with a call to alloca().
   const AllocaRegion *getAllocaRegion(const Expr *Ex, unsigned Cnt,
@@ -1262,8 +1273,8 @@ public:
                                   baseReg->isVirtual());
   }
 
-  const FunctionTextRegion *getFunctionTextRegion(const NamedDecl *FD);
-  const BlockTextRegion *getBlockTextRegion(const BlockDecl *BD,
+  const FunctionCodeRegion *getFunctionCodeRegion(const NamedDecl *FD);
+  const BlockCodeRegion *getBlockCodeRegion(const BlockDecl *BD,
                                             CanQualType locTy,
                                             AnalysisDeclContext *AC);
   
@@ -1271,7 +1282,7 @@ public:
   ///  of a block.  Unlike many other MemRegions, the LocationContext*
   ///  argument is allowed to be NULL for cases where we have no known
   ///  context.
-  const BlockDataRegion *getBlockDataRegion(const BlockTextRegion *bc,
+  const BlockDataRegion *getBlockDataRegion(const BlockCodeRegion *bc,
                                             const LocationContext *lc,
                                             unsigned blockCount);
 
diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
index a68d341..3c47114 100644
--- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
+++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
@@ -83,7 +83,11 @@ public:
   }
 
   SVal evalCast(SVal val, QualType castTy, QualType originalType);
-  
+
+  // Handles casts of type CK_IntegralCast.
+  SVal evalIntegralCast(ProgramStateRef state, SVal val, QualType castTy,
+                        QualType originalType);
+
   virtual SVal evalMinus(NonLoc val) = 0;
 
   virtual SVal evalComplement(NonLoc val) = 0;
diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
index 642e11a..d644254 100644
--- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
+++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
@@ -45,8 +45,8 @@ class SVal {
 public:
   enum BaseKind {
     // The enumerators must be representable using 2 bits.
-    UndefinedKind = 0,  // for subclass UndefinedVal (an uninitialized value)
-    UnknownKind = 1,    // for subclass UnknownVal (a void value)
+    UndefinedValKind = 0,  // for subclass UndefinedVal (an uninitialized value)
+    UnknownValKind = 1,    // for subclass UnknownVal (a void value)
     LocKind = 2,        // for subclass Loc (an L-value)
     NonLocKind = 3      // for subclass NonLoc (an R-value that's not
                         //   an L-value)
@@ -115,19 +115,19 @@ public:
   }
 
   inline bool isUnknown() const {
-    return getRawKind() == UnknownKind;
+    return getRawKind() == UnknownValKind;
   }
 
   inline bool isUndef() const {
-    return getRawKind() == UndefinedKind;
+    return getRawKind() == UndefinedValKind;
   }
 
   inline bool isUnknownOrUndef() const {
-    return getRawKind() <= UnknownKind;
+    return getRawKind() <= UnknownValKind;
   }
 
   inline bool isValid() const {
-    return getRawKind() > UnknownKind;
+    return getRawKind() > UnknownValKind;
   }
 
   bool isConstant() const;
@@ -190,12 +190,12 @@ public:
 
 class UndefinedVal : public SVal {
 public:
-  UndefinedVal() : SVal(UndefinedKind) {}
+  UndefinedVal() : SVal(UndefinedValKind) {}
 
 private:
   friend class SVal;
   static bool isKind(const SVal& V) {
-    return V.getBaseKind() == UndefinedKind;
+    return V.getBaseKind() == UndefinedValKind;
   }
 };
 
@@ -223,12 +223,12 @@ private:
   
 class UnknownVal : public DefinedOrUnknownSVal {
 public:
-  explicit UnknownVal() : DefinedOrUnknownSVal(UnknownKind) {}
+  explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {}
   
 private:
   friend class SVal;
   static bool isKind(const SVal &V) {
-    return V.getBaseKind() == UnknownKind;
+    return V.getBaseKind() == UnknownValKind;
   }
 };
 
@@ -465,7 +465,7 @@ private:
 
 namespace loc {
 
-enum Kind { GotoLabelKind, MemRegionKind, ConcreteIntKind };
+enum Kind { GotoLabelKind, MemRegionValKind, ConcreteIntKind };
 
 class GotoLabel : public Loc {
 public:
@@ -490,7 +490,7 @@ private:
 
 class MemRegionVal : public Loc {
 public:
-  explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionKind, r) {}
+  explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionValKind, r) {}
 
   /// \brief Get the underlining region.
   const MemRegion* getRegion() const {
@@ -518,11 +518,11 @@ private:
   MemRegionVal() {}
   static bool isKind(const SVal& V) {
     return V.getBaseKind() == LocKind &&
-           V.getSubKind() == MemRegionKind;
+           V.getSubKind() == MemRegionValKind;
   }
 
   static bool isKind(const Loc& V) {
-    return V.getSubKind() == MemRegionKind;
+    return V.getSubKind() == MemRegionValKind;
   }
 };
 
diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
index 9dbfab2..77d12e5 100644
--- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -42,14 +42,22 @@ namespace ento {
 class SymExpr : public llvm::FoldingSetNode {
   virtual void anchor();
 public:
-  enum Kind { RegionValueKind, ConjuredKind, DerivedKind, ExtentKind,
-              MetadataKind,
-              BEGIN_SYMBOLS = RegionValueKind,
-              END_SYMBOLS = MetadataKind,
-              SymIntKind, IntSymKind, SymSymKind,
-              BEGIN_BINARYSYMEXPRS = SymIntKind,
-              END_BINARYSYMEXPRS = SymSymKind,
-              CastSymbolKind };
+  enum Kind {
+    SymbolRegionValueKind,
+    SymbolConjuredKind,
+    SymbolDerivedKind,
+    SymbolExtentKind,
+    SymbolMetadataKind,
+    BEGIN_SYMBOLS = SymbolRegionValueKind,
+    END_SYMBOLS = SymbolMetadataKind,
+    SymIntExprKind,
+    IntSymExprKind,
+    SymSymExprKind,
+    BEGIN_BINARYSYMEXPRS = SymIntExprKind,
+    END_BINARYSYMEXPRS = SymSymExprKind,
+    SymbolCastKind
+  };
+
 private:
   Kind K;
 
@@ -126,12 +134,12 @@ class SymbolRegionValue : public SymbolData {
 
 public:
   SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
-    : SymbolData(RegionValueKind, sym), R(r) {}
+    : SymbolData(SymbolRegionValueKind, sym), R(r) {}
 
   const TypedValueRegion* getRegion() const { return R; }
 
   static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
-    profile.AddInteger((unsigned) RegionValueKind);
+    profile.AddInteger((unsigned) SymbolRegionValueKind);
     profile.AddPointer(R);
   }
 
@@ -145,7 +153,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == RegionValueKind;
+    return SE->getKind() == SymbolRegionValueKind;
   }
 };
 
@@ -160,11 +168,9 @@ class SymbolConjured : public SymbolData {
 
 public:
   SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
-		 QualType t, unsigned count,
-                 const void *symbolTag)
-    : SymbolData(ConjuredKind, sym), S(s), T(t), Count(count),
-      LCtx(lctx),
-      SymbolTag(symbolTag) {}
+                 QualType t, unsigned count, const void *symbolTag)
+      : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
+        LCtx(lctx), SymbolTag(symbolTag) {}
 
   const Stmt *getStmt() const { return S; }
   unsigned getCount() const { return Count; }
@@ -177,7 +183,7 @@ public:
   static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
                       QualType T, unsigned Count, const LocationContext *LCtx,
                       const void *SymbolTag) {
-    profile.AddInteger((unsigned) ConjuredKind);
+    profile.AddInteger((unsigned) SymbolConjuredKind);
     profile.AddPointer(S);
     profile.AddPointer(LCtx);
     profile.Add(T);
@@ -191,7 +197,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == ConjuredKind;
+    return SE->getKind() == SymbolConjuredKind;
   }
 };
 
@@ -203,7 +209,7 @@ class SymbolDerived : public SymbolData {
 
 public:
   SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
-    : SymbolData(DerivedKind, sym), parentSymbol(parent), R(r) {}
+    : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {}
 
   SymbolRef getParentSymbol() const { return parentSymbol; }
   const TypedValueRegion *getRegion() const { return R; }
@@ -214,7 +220,7 @@ public:
 
   static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
                       const TypedValueRegion *r) {
-    profile.AddInteger((unsigned) DerivedKind);
+    profile.AddInteger((unsigned) SymbolDerivedKind);
     profile.AddPointer(r);
     profile.AddPointer(parent);
   }
@@ -225,7 +231,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == DerivedKind;
+    return SE->getKind() == SymbolDerivedKind;
   }
 };
 
@@ -237,7 +243,7 @@ class SymbolExtent : public SymbolData {
   
 public:
   SymbolExtent(SymbolID sym, const SubRegion *r)
-  : SymbolData(ExtentKind, sym), R(r) {}
+  : SymbolData(SymbolExtentKind, sym), R(r) {}
 
   const SubRegion *getRegion() const { return R; }
 
@@ -246,7 +252,7 @@ public:
   void dumpToStream(raw_ostream &os) const override;
 
   static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
-    profile.AddInteger((unsigned) ExtentKind);
+    profile.AddInteger((unsigned) SymbolExtentKind);
     profile.AddPointer(R);
   }
 
@@ -256,7 +262,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == ExtentKind;
+    return SE->getKind() == SymbolExtentKind;
   }
 };
 
@@ -273,7 +279,7 @@ class SymbolMetadata : public SymbolData {
 public:
   SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
                  unsigned count, const void *tag)
-  : SymbolData(MetadataKind, sym), R(r), S(s), T(t), Count(count), Tag(tag) {}
+  : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), Count(count), Tag(tag) {}
 
   const MemRegion *getRegion() const { return R; }
   const Stmt *getStmt() const { return S; }
@@ -287,7 +293,7 @@ public:
   static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
                       const Stmt *S, QualType T, unsigned Count,
                       const void *Tag) {
-    profile.AddInteger((unsigned) MetadataKind);
+    profile.AddInteger((unsigned) SymbolMetadataKind);
     profile.AddPointer(R);
     profile.AddPointer(S);
     profile.Add(T);
@@ -301,7 +307,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == MetadataKind;
+    return SE->getKind() == SymbolMetadataKind;
   }
 };
 
@@ -315,7 +321,7 @@ class SymbolCast : public SymExpr {
 
 public:
   SymbolCast(const SymExpr *In, QualType From, QualType To) :
-    SymExpr(CastSymbolKind), Operand(In), FromTy(From), ToTy(To) { }
+    SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { }
 
   QualType getType() const override { return ToTy; }
 
@@ -325,7 +331,7 @@ public:
 
   static void Profile(llvm::FoldingSetNodeID& ID,
                       const SymExpr *In, QualType From, QualType To) {
-    ID.AddInteger((unsigned) CastSymbolKind);
+    ID.AddInteger((unsigned) SymbolCastKind);
     ID.AddPointer(In);
     ID.Add(From);
     ID.Add(To);
@@ -337,7 +343,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == CastSymbolKind;
+    return SE->getKind() == SymbolCastKind;
   }
 };
 
@@ -372,7 +378,7 @@ class SymIntExpr : public BinarySymExpr {
 public:
   SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
              const llvm::APSInt& rhs, QualType t)
-    : BinarySymExpr(SymIntKind, op, t), LHS(lhs), RHS(rhs) {}
+    : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) {}
 
   void dumpToStream(raw_ostream &os) const override;
 
@@ -382,7 +388,7 @@ public:
   static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
                       BinaryOperator::Opcode op, const llvm::APSInt& rhs,
                       QualType t) {
-    ID.AddInteger((unsigned) SymIntKind);
+    ID.AddInteger((unsigned) SymIntExprKind);
     ID.AddPointer(lhs);
     ID.AddInteger(op);
     ID.AddPointer(&rhs);
@@ -395,7 +401,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == SymIntKind;
+    return SE->getKind() == SymIntExprKind;
   }
 };
 
@@ -407,7 +413,7 @@ class IntSymExpr : public BinarySymExpr {
 public:
   IntSymExpr(const llvm::APSInt& lhs, BinaryOperator::Opcode op,
              const SymExpr *rhs, QualType t)
-    : BinarySymExpr(IntSymKind, op, t), LHS(lhs), RHS(rhs) {}
+    : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) {}
 
   void dumpToStream(raw_ostream &os) const override;
 
@@ -417,7 +423,7 @@ public:
   static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs,
                       BinaryOperator::Opcode op, const SymExpr *rhs,
                       QualType t) {
-    ID.AddInteger((unsigned) IntSymKind);
+    ID.AddInteger((unsigned) IntSymExprKind);
     ID.AddPointer(&lhs);
     ID.AddInteger(op);
     ID.AddPointer(rhs);
@@ -430,7 +436,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == IntSymKind;
+    return SE->getKind() == IntSymExprKind;
   }
 };
 
@@ -442,7 +448,7 @@ class SymSymExpr : public BinarySymExpr {
 public:
   SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs,
              QualType t)
-    : BinarySymExpr(SymSymKind, op, t), LHS(lhs), RHS(rhs) {}
+    : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) {}
 
   const SymExpr *getLHS() const { return LHS; }
   const SymExpr *getRHS() const { return RHS; }
@@ -451,7 +457,7 @@ public:
 
   static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
                     BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) {
-    ID.AddInteger((unsigned) SymSymKind);
+    ID.AddInteger((unsigned) SymSymExprKind);
     ID.AddPointer(lhs);
     ID.AddInteger(op);
     ID.AddPointer(rhs);
@@ -464,7 +470,7 @@ public:
 
   // Implement isa<T> support.
   static inline bool classof(const SymExpr *SE) {
-    return SE->getKind() == SymSymKind;
+    return SE->getKind() == SymSymExprKind;
   }
 };
 
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
index d4abbe4..6438696 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
@@ -1836,6 +1836,13 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
       Align = static_cast<unsigned>(Width);
     }
   }
+  break;
+
+  case Type::Pipe: {
+    TypeInfo Info = getTypeInfo(cast<PipeType>(T)->getElementType());
+    Width = Info.Width;
+    Align = Info.Align;
+  }
 
   }
 
@@ -2663,6 +2670,7 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const {
   case Type::FunctionProto:
   case Type::BlockPointer:
   case Type::MemberPointer:
+  case Type::Pipe:
     return type;
 
   // These types can be variably-modified.  All these modifications
@@ -3117,6 +3125,32 @@ ASTContext::getFunctionType(QualType ResultTy, ArrayRef<QualType> ArgArray,
   return QualType(FTP, 0);
 }
 
+/// Return pipe type for the specified type.
+QualType ASTContext::getPipeType(QualType T) const {
+  llvm::FoldingSetNodeID ID;
+  PipeType::Profile(ID, T);
+
+  void *InsertPos = 0;
+  if (PipeType *PT = PipeTypes.FindNodeOrInsertPos(ID, InsertPos))
+    return QualType(PT, 0);
+
+  // If the pipe element type isn't canonical, this won't be a canonical type
+  // either, so fill in the canonical type field.
+  QualType Canonical;
+  if (!T.isCanonical()) {
+    Canonical = getPipeType(getCanonicalType(T));
+
+    // Get the new insert position for the node we care about.
+    PipeType *NewIP = PipeTypes.FindNodeOrInsertPos(ID, InsertPos);
+    assert(!NewIP && "Shouldn't be in the map!");
+    (void)NewIP;
+  }
+  PipeType *New = new (*this, TypeAlignment) PipeType(T, Canonical);
+  Types.push_back(New);
+  PipeTypes.InsertNode(New, InsertPos);
+  return QualType(New, 0);
+}
+
 #ifndef NDEBUG
 static bool NeedsInjectedClassNameType(const RecordDecl *D) {
   if (!isa<CXXRecordDecl>(D)) return false;
@@ -5857,6 +5891,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S,
   case Type::Auto:
     return;
 
+  case Type::Pipe:
 #define ABSTRACT_TYPE(KIND, BASE)
 #define TYPE(KIND, BASE)
 #define DEPENDENT_TYPE(KIND, BASE) \
@@ -7792,6 +7827,24 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
 
     return QualType();
   }
+  case Type::Pipe:
+  {
+    // Merge two pointer types, while trying to preserve typedef info
+    QualType LHSValue = LHS->getAs<PipeType>()->getElementType();
+    QualType RHSValue = RHS->getAs<PipeType>()->getElementType();
+    if (Unqualified) {
+      LHSValue = LHSValue.getUnqualifiedType();
+      RHSValue = RHSValue.getUnqualifiedType();
+    }
+    QualType ResultType = mergeTypes(LHSValue, RHSValue, false,
+                                     Unqualified);
+    if (ResultType.isNull()) return QualType();
+    if (getCanonicalType(LHSValue) == getCanonicalType(ResultType))
+      return LHS;
+    if (getCanonicalType(RHSValue) == getCanonicalType(ResultType))
+      return RHS;
+    return getPipeType(ResultType);
+  }
   }
 
   llvm_unreachable("Invalid Type::Class!");
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
index e7fee03..4622a75 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
@@ -1055,6 +1055,7 @@ void ASTDumper::VisitTypedefDecl(const TypedefDecl *D) {
   dumpType(D->getUnderlyingType());
   if (D->isModulePrivate())
     OS << " __module_private__";
+  dumpTypeAsChild(D->getUnderlyingType());
 }
 
 void ASTDumper::VisitEnumDecl(const EnumDecl *D) {
@@ -1226,6 +1227,7 @@ void ASTDumper::VisitNamespaceAliasDecl(const NamespaceAliasDecl *D) {
 void ASTDumper::VisitTypeAliasDecl(const TypeAliasDecl *D) {
   dumpName(D);
   dumpType(D->getUnderlyingType());
+  dumpTypeAsChild(D->getUnderlyingType());
 }
 
 void ASTDumper::VisitTypeAliasTemplateDecl(const TypeAliasTemplateDecl *D) {
@@ -1419,6 +1421,8 @@ void ASTDumper::VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl *D)
 void ASTDumper::VisitUsingShadowDecl(const UsingShadowDecl *D) {
   OS << ' ';
   dumpBareDeclRef(D->getTargetDecl());
+  if (auto *TD = dyn_cast<TypeDecl>(D->getUnderlyingDecl()))
+    dumpTypeAsChild(TD->getTypeForDecl());
 }
 
 void ASTDumper::VisitLinkageSpecDecl(const LinkageSpecDecl *D) {
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
index 359db1b..916f108 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
@@ -878,6 +878,14 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
     break;
   }
 
+  case Type::Pipe: {
+    if (!IsStructurallyEquivalent(Context,
+                                  cast<PipeType>(T1)->getElementType(),
+                                  cast<PipeType>(T2)->getElementType()))
+      return false;
+    break;
+  }
+
   } // end switch
 
   return true;
diff --git a/contrib/llvm/tools/clang/lib/AST/Decl.cpp b/contrib/llvm/tools/clang/lib/AST/Decl.cpp
index 42bebc5..427ca5e 100644
--- a/contrib/llvm/tools/clang/lib/AST/Decl.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Decl.cpp
@@ -1184,7 +1184,7 @@ static LinkageInfo getLVForLocalDecl(const NamedDecl *D,
     return LinkageInfo::none();
 
   const Decl *OuterD = getOutermostFuncOrBlockContext(D);
-  if (!OuterD)
+  if (!OuterD || OuterD->isInvalidDecl())
     return LinkageInfo::none();
 
   LinkageInfo LV;
@@ -4024,16 +4024,26 @@ EnumConstantDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
 
 void IndirectFieldDecl::anchor() { }
 
+IndirectFieldDecl::IndirectFieldDecl(ASTContext &C, DeclContext *DC,
+                                     SourceLocation L, DeclarationName N,
+                                     QualType T, NamedDecl **CH, unsigned CHS)
+    : ValueDecl(IndirectField, DC, L, N, T), Chaining(CH), ChainingSize(CHS) {
+  // In C++, indirect field declarations conflict with tag declarations in the
+  // same scope, so add them to IDNS_Tag so that tag redeclaration finds them.
+  if (C.getLangOpts().CPlusPlus)
+    IdentifierNamespace |= IDNS_Tag;
+}
+
 IndirectFieldDecl *
 IndirectFieldDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L,
                           IdentifierInfo *Id, QualType T, NamedDecl **CH,
                           unsigned CHS) {
-  return new (C, DC) IndirectFieldDecl(DC, L, Id, T, CH, CHS);
+  return new (C, DC) IndirectFieldDecl(C, DC, L, Id, T, CH, CHS);
 }
 
 IndirectFieldDecl *IndirectFieldDecl::CreateDeserialized(ASTContext &C,
                                                          unsigned ID) {
-  return new (C, ID) IndirectFieldDecl(nullptr, SourceLocation(),
+  return new (C, ID) IndirectFieldDecl(C, nullptr, SourceLocation(),
                                        DeclarationName(), QualType(), nullptr,
                                        0);
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp b/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp
index 16394e8..72587e3 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp
@@ -569,7 +569,6 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) {
     case Var:
     case ImplicitParam:
     case ParmVar:
-    case NonTypeTemplateParm:
     case ObjCMethod:
     case ObjCProperty:
     case MSProperty:
@@ -579,6 +578,12 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) {
     case IndirectField:
       return IDNS_Ordinary | IDNS_Member;
 
+    case NonTypeTemplateParm:
+      // Non-type template parameters are not found by lookups that ignore
+      // non-types, but they are found by redeclaration lookups for tag types,
+      // so we include them in the tag namespace.
+      return IDNS_Ordinary | IDNS_Tag;
+
     case ObjCCompatibleAlias:
     case ObjCInterface:
       return IDNS_Ordinary | IDNS_Type;
diff --git a/contrib/llvm/tools/clang/lib/AST/Expr.cpp b/contrib/llvm/tools/clang/lib/AST/Expr.cpp
index f9757b2..52f34df 100644
--- a/contrib/llvm/tools/clang/lib/AST/Expr.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Expr.cpp
@@ -1553,6 +1553,7 @@ bool CastExpr::CastConsistency() const {
   case CK_ToVoid:
   case CK_VectorSplat:
   case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
   case CK_IntegralToFloating:
   case CK_FloatingToIntegral:
   case CK_FloatingCast:
@@ -1646,6 +1647,8 @@ const char *CastExpr::getCastKindName() const {
     return "VectorSplat";
   case CK_IntegralCast:
     return "IntegralCast";
+  case CK_BooleanToSignedIntegral:
+    return "BooleanToSignedIntegral";
   case CK_IntegralToBoolean:
     return "IntegralToBoolean";
   case CK_IntegralToFloating:
diff --git a/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp b/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp
index d35efcb..ea98334 100644
--- a/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp
@@ -763,14 +763,6 @@ const IdentifierInfo *UserDefinedLiteral::getUDSuffix() const {
   return cast<FunctionDecl>(getCalleeDecl())->getLiteralIdentifier();
 }
 
-CXXDefaultArgExpr *
-CXXDefaultArgExpr::Create(const ASTContext &C, SourceLocation Loc, 
-                          ParmVarDecl *Param, Expr *SubExpr) {
-  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(1));
-  return new (Mem) CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, 
-                                     SubExpr);
-}
-
 CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &C, SourceLocation Loc,
                                        FieldDecl *Field, QualType T)
     : Expr(CXXDefaultInitExprClass, T.getNonLValueExprType(C),
diff --git a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
index c4c4398..fa652ba 100644
--- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
@@ -7781,12 +7781,16 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_PointerToBoolean:
   case CK_IntegralToBoolean:
   case CK_FloatingToBoolean:
+  case CK_BooleanToSignedIntegral:
   case CK_FloatingComplexToBoolean:
   case CK_IntegralComplexToBoolean: {
     bool BoolResult;
     if (!EvaluateAsBooleanCondition(SubExpr, BoolResult, Info))
       return false;
-    return Success(BoolResult, E);
+    uint64_t IntResult = BoolResult;
+    if (BoolResult && E->getCastKind() == CK_BooleanToSignedIntegral)
+      IntResult = (uint64_t)-1;
+    return Success(IntResult, E);
   }
 
   case CK_IntegralCast: {
@@ -8223,6 +8227,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_ToVoid:
   case CK_VectorSplat:
   case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
   case CK_IntegralToBoolean:
   case CK_IntegralToFloating:
   case CK_FloatingToIntegral:
diff --git a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
index 8018188..3f6b682 100644
--- a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
@@ -1509,6 +1509,7 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
   case Type::ObjCInterface:
   case Type::ObjCObjectPointer:
   case Type::Atomic:
+  case Type::Pipe:
     llvm_unreachable("type is illegal as a nested name specifier");
 
   case Type::SubstTemplateTypeParmPack:
@@ -2682,6 +2683,13 @@ void CXXNameMangler::mangleType(const AtomicType *T) {
   mangleType(T->getValueType());
 }
 
+void CXXNameMangler::mangleType(const PipeType *T) {
+  // Pipe type mangling rules are described in SPIR 2.0 specification
+  // A.1 Data types and A.3 Summary of changes
+  // <type> ::= 8ocl_pipe
+  Out << "8ocl_pipe";
+}
+
 void CXXNameMangler::mangleIntegerLiteral(QualType T,
                                           const llvm::APSInt &Value) {
   //  <expr-primary> ::= L <type> <value number> E # integer literal
diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
index d45232b..4a45f9e 100644
--- a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
@@ -2428,6 +2428,15 @@ void MicrosoftCXXNameMangler::mangleType(const AtomicType *T, Qualifiers,
   mangleArtificalTagType(TTK_Struct, TemplateMangling, {"__clang"});
 }
 
+void MicrosoftCXXNameMangler::mangleType(const PipeType *T, Qualifiers,
+                                         SourceRange Range) {
+  DiagnosticsEngine &Diags = Context.getDiags();
+  unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
+    "cannot mangle this OpenCL pipe type yet");
+  Diags.Report(Range.getBegin(), DiagID)
+    << Range;
+}
+
 void MicrosoftMangleContextImpl::mangleCXXName(const NamedDecl *D,
                                                raw_ostream &Out) {
   assert((isa<FunctionDecl>(D) || isa<VarDecl>(D)) &&
diff --git a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
index bc3c2a8..bc5ae0f 100644
--- a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -1552,7 +1552,8 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
     FieldAlign = 1;
 
   // But, if there's an 'aligned' attribute on the field, honor that.
-  if (unsigned ExplicitFieldAlign = D->getMaxAlignment()) {
+  unsigned ExplicitFieldAlign = D->getMaxAlignment();
+  if (ExplicitFieldAlign) {
     FieldAlign = std::max(FieldAlign, ExplicitFieldAlign);
     UnpackedFieldAlign = std::max(UnpackedFieldAlign, ExplicitFieldAlign);
   }
@@ -1601,6 +1602,10 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
         (AllowPadding &&
          (FieldOffset & (FieldAlign-1)) + FieldSize > TypeSize)) {
       FieldOffset = llvm::RoundUpToAlignment(FieldOffset, FieldAlign);
+    } else if (ExplicitFieldAlign) {
+      // TODO: figure it out what needs to be done on targets that don't honor
+      // bit-field type alignment like ARM APCS ABI.
+      FieldOffset = llvm::RoundUpToAlignment(FieldOffset, ExplicitFieldAlign);
     }
 
     // Repeat the computation for diagnostic purposes.
@@ -1609,6 +1614,9 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
          (UnpackedFieldOffset & (UnpackedFieldAlign-1)) + FieldSize > TypeSize))
       UnpackedFieldOffset = llvm::RoundUpToAlignment(UnpackedFieldOffset,
                                                      UnpackedFieldAlign);
+    else if (ExplicitFieldAlign)
+      UnpackedFieldOffset = llvm::RoundUpToAlignment(UnpackedFieldOffset,
+                                                     ExplicitFieldAlign);
   }
 
   // If we're using external layout, give the external layout a chance
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
index e55b2fc..69f52f5 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
@@ -1165,6 +1165,7 @@ void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
   switch (Node->getKind()) {
   case CharacterLiteral::Ascii: break; // no prefix.
   case CharacterLiteral::Wide:  OS << 'L'; break;
+  case CharacterLiteral::UTF8:  OS << "u8"; break;
   case CharacterLiteral::UTF16: OS << 'u'; break;
   case CharacterLiteral::UTF32: OS << 'U'; break;
   }
diff --git a/contrib/llvm/tools/clang/lib/AST/Type.cpp b/contrib/llvm/tools/clang/lib/AST/Type.cpp
index 7dd38cb..b467dac 100644
--- a/contrib/llvm/tools/clang/lib/AST/Type.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Type.cpp
@@ -2614,7 +2614,7 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
   case OCLQueue:
     return "queue_t";
   case OCLNDRange:
-    return "event_t";
+    return "ndrange_t";
   case OCLReserveID:
     return "reserve_id_t";
   case OMPArraySection:
@@ -3361,6 +3361,8 @@ static CachedProperties computeCachedProperties(const Type *T) {
     return Cache::get(cast<ObjCObjectPointerType>(T)->getPointeeType());
   case Type::Atomic:
     return Cache::get(cast<AtomicType>(T)->getValueType());
+  case Type::Pipe:
+    return Cache::get(cast<PipeType>(T)->getElementType());
   }
 
   llvm_unreachable("unhandled type class");
@@ -3443,6 +3445,8 @@ static LinkageInfo computeLinkageInfo(const Type *T) {
     return computeLinkageInfo(cast<ObjCObjectPointerType>(T)->getPointeeType());
   case Type::Atomic:
     return computeLinkageInfo(cast<AtomicType>(T)->getValueType());
+  case Type::Pipe:
+    return computeLinkageInfo(cast<PipeType>(T)->getElementType());
   }
 
   llvm_unreachable("unhandled type class");
@@ -3601,6 +3605,7 @@ bool Type::canHaveNullability() const {
   case Type::ObjCObject:
   case Type::ObjCInterface:
   case Type::Atomic:
+  case Type::Pipe:
     return false;
   }
   llvm_unreachable("bad type kind!");
diff --git a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
index 4617e1d..b202523 100644
--- a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
@@ -193,6 +193,7 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
     case Type::ObjCObject:
     case Type::ObjCInterface:
     case Type::Atomic:
+    case Type::Pipe:
       CanPrefixQualifiers = true;
       break;
       
@@ -859,6 +860,15 @@ void TypePrinter::printAtomicBefore(const AtomicType *T, raw_ostream &OS) {
 }
 void TypePrinter::printAtomicAfter(const AtomicType *T, raw_ostream &OS) { }
 
+void TypePrinter::printPipeBefore(const PipeType *T, raw_ostream &OS) {
+  IncludeStrongLifetimeRAII Strong(Policy);
+
+  OS << "pipe";
+  spaceBeforePlaceHolder(OS);
+}
+
+void TypePrinter::printPipeAfter(const PipeType *T, raw_ostream &OS) {
+}
 /// Appends the given scope to the end of a string.
 void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS) {
   if (DC->isTranslationUnit()) return;
diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
index 9ce5257..1bc6c51 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
@@ -223,7 +223,24 @@ protected:
 
 public:
   DarwinTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->TLSSupported = Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 7);
+    // By default, no TLS, and we whitelist permitted architecture/OS
+    // combinations.
+    this->TLSSupported = false;
+
+    if (Triple.isMacOSX())
+      this->TLSSupported = !Triple.isMacOSXVersionLT(10, 7);
+    else if (Triple.isiOS()) {
+      // 64-bit iOS supported it from 8 onwards, 32-bit from 9 onwards.
+      if (Triple.getArch() == llvm::Triple::x86_64 ||
+          Triple.getArch() == llvm::Triple::aarch64)
+        this->TLSSupported = !Triple.isOSVersionLT(8);
+      else if (Triple.getArch() == llvm::Triple::x86 ||
+               Triple.getArch() == llvm::Triple::arm ||
+               Triple.getArch() == llvm::Triple::thumb)
+        this->TLSSupported = !Triple.isOSVersionLT(9);
+    } else if (Triple.isWatchOS())
+      this->TLSSupported = !Triple.isOSVersionLT(2);
+
     this->MCountName = "\01mcount";
   }
 
@@ -7281,7 +7298,7 @@ public:
   explicit WebAssembly32TargetInfo(const llvm::Triple &T)
       : WebAssemblyTargetInfo(T) {
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
-    DataLayoutString = "e-p:32:32-i64:64-n32:64-S128";
+    DataLayoutString = "e-m:e-p:32:32-i64:64-n32:64-S128";
   }
 
 protected:
@@ -7299,7 +7316,7 @@ public:
     LongAlign = LongWidth = 64;
     PointerAlign = PointerWidth = 64;
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
-    DataLayoutString = "e-p:64:64-i64:64-n32:64-S128";
+    DataLayoutString = "e-m:e-p:64:64-i64:64-n32:64-S128";
   }
 
 protected:
diff --git a/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp b/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp
index cf5a8d6..6977f40 100644
--- a/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp
@@ -658,6 +658,23 @@ directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir,
   EC = make_error_code(llvm::errc::not_a_directory);
   return directory_iterator(std::make_shared<InMemoryDirIterator>());
 }
+
+std::error_code InMemoryFileSystem::setCurrentWorkingDirectory(const Twine &P) {
+  SmallString<128> Path;
+  P.toVector(Path);
+
+  // Fix up relative paths. This just prepends the current working directory.
+  std::error_code EC = makeAbsolute(Path);
+  assert(!EC);
+  (void)EC;
+
+  if (useNormalizedPaths())
+    llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+  if (!Path.empty())
+    WorkingDirectory = Path.str();
+  return std::error_code();
+}
 }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
index 7032d00..6d746c2 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
@@ -28,6 +28,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -54,7 +55,6 @@ class EmitAssemblyHelper {
   const clang::TargetOptions &TargetOpts;
   const LangOptions &LangOpts;
   Module *TheModule;
-  std::unique_ptr<FunctionInfoIndex> FunctionIndex;
 
   Timer CodeGenerationTime;
 
@@ -97,7 +97,7 @@ private:
     return PerFunctionPasses;
   }
 
-  void CreatePasses();
+  void CreatePasses(FunctionInfoIndex *FunctionIndex);
 
   /// Generates the TargetMachine.
   /// Returns Null if it is unable to create the target machine.
@@ -117,12 +117,11 @@ private:
 public:
   EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts,
                      const clang::TargetOptions &TOpts,
-                     const LangOptions &LOpts, Module *M,
-                     std::unique_ptr<FunctionInfoIndex> Index)
+                     const LangOptions &LOpts, Module *M)
       : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts),
-        TheModule(M), FunctionIndex(std::move(Index)),
-        CodeGenerationTime("Code Generation Time"), CodeGenPasses(nullptr),
-        PerModulePasses(nullptr), PerFunctionPasses(nullptr) {}
+        TheModule(M), CodeGenerationTime("Code Generation Time"),
+        CodeGenPasses(nullptr), PerModulePasses(nullptr),
+        PerFunctionPasses(nullptr) {}
 
   ~EmitAssemblyHelper() {
     delete CodeGenPasses;
@@ -278,7 +277,7 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts,
   MPM->add(createRewriteSymbolsPass(DL));
 }
 
-void EmitAssemblyHelper::CreatePasses() {
+void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
   if (CodeGenOpts.DisableLLVMPasses)
     return;
 
@@ -332,9 +331,8 @@ void EmitAssemblyHelper::CreatePasses() {
 
   // If we are performing a ThinLTO importing compile, invoke the LTO
   // pipeline and pass down the in-memory function index.
-  if (!CodeGenOpts.ThinLTOIndexFile.empty()) {
-    assert(FunctionIndex && "Expected non-empty function index");
-    PMBuilder.FunctionIndex = FunctionIndex.get();
+  if (FunctionIndex) {
+    PMBuilder.FunctionIndex = FunctionIndex;
     PMBuilder.populateLTOPassManager(*MPM);
     return;
   }
@@ -642,7 +640,28 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
     return;
   if (TM)
     TheModule->setDataLayout(TM->createDataLayout());
-  CreatePasses();
+
+  // If we are performing a ThinLTO importing compile, load the function
+  // index into memory and pass it into CreatePasses, which will add it
+  // to the PassManagerBuilder and invoke LTO passes.
+  std::unique_ptr<FunctionInfoIndex> FunctionIndex;
+  if (!CodeGenOpts.ThinLTOIndexFile.empty()) {
+    ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
+        llvm::getFunctionIndexForFile(CodeGenOpts.ThinLTOIndexFile,
+                                      [&](const DiagnosticInfo &DI) {
+                                        TheModule->getContext().diagnose(DI);
+                                      });
+    if (std::error_code EC = IndexOrErr.getError()) {
+      std::string Error = EC.message();
+      errs() << "Error loading index file '" << CodeGenOpts.ThinLTOIndexFile
+             << "': " << Error << "\n";
+      return;
+    }
+    FunctionIndex = std::move(IndexOrErr.get());
+    assert(FunctionIndex && "Expected non-empty function index");
+  }
+
+  CreatePasses(FunctionIndex.get());
 
   switch (Action) {
   case Backend_EmitNothing:
@@ -695,10 +714,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
                               const clang::TargetOptions &TOpts,
                               const LangOptions &LOpts, StringRef TDesc,
                               Module *M, BackendAction Action,
-                              raw_pwrite_stream *OS,
-                              std::unique_ptr<FunctionInfoIndex> Index) {
-  EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M,
-                               std::move(Index));
+                              raw_pwrite_stream *OS) {
+  EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
 
   AsmHelper.EmitAssembly(Action, OS);
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
index 78e3978..5df8519 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -2025,6 +2025,11 @@ llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) {
   return getOrCreateType(Ty->getValueType(), U);
 }
 
+llvm::DIType* CGDebugInfo::CreateType(const PipeType *Ty,
+                                     llvm::DIFile *U) {
+  return getOrCreateType(Ty->getElementType(), U);
+}
+
 llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
   const EnumDecl *ED = Ty->getDecl();
 
@@ -2284,6 +2289,9 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
   case Type::Atomic:
     return CreateType(cast<AtomicType>(Ty), Unit);
 
+  case Type::Pipe:
+    return CreateType(cast<PipeType>(Ty), Unit);
+
   case Type::TemplateSpecialization:
     return CreateType(cast<TemplateSpecializationType>(Ty), Unit);
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
index 57d5c80..a68dd33f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
@@ -168,6 +168,7 @@ class CGDebugInfo {
   llvm::DIType *CreateType(const RValueReferenceType *Ty, llvm::DIFile *Unit);
   llvm::DIType *CreateType(const MemberPointerType *Ty, llvm::DIFile *F);
   llvm::DIType *CreateType(const AtomicType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const PipeType *Ty, llvm::DIFile *F);
   /// Get enumeration type.
   llvm::DIType *CreateEnumType(const EnumType *Ty);
   llvm::DIType *CreateTypeDefinition(const EnumType *Ty);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
index dabd2b1..507ce3d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
@@ -3365,6 +3365,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   case CK_PointerToBoolean:
   case CK_VectorSplat:
   case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
   case CK_IntegralToBoolean:
   case CK_IntegralToFloating:
   case CK_FloatingToIntegral:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
index 20838db..a4547a9 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
@@ -721,6 +721,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   case CK_ToVoid:
   case CK_VectorSplat:
   case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
   case CK_IntegralToBoolean:
   case CK_IntegralToFloating:
   case CK_FloatingToIntegral:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp
index ccdb532..22910d9 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprComplex.cpp
@@ -462,6 +462,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
   case CK_ToVoid:
   case CK_VectorSplat:
   case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
   case CK_IntegralToBoolean:
   case CK_IntegralToFloating:
   case CK_FloatingToIntegral:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
index 3839ab7..ee049f1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
@@ -735,6 +735,7 @@ public:
     case CK_PointerToBoolean:
     case CK_NullToPointer:
     case CK_IntegralCast:
+    case CK_BooleanToSignedIntegral:
     case CK_IntegralToPointer:
     case CK_IntegralToBoolean:
     case CK_IntegralToFloating:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 725d96f..268e796 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -811,14 +811,15 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
 
   // A scalar can be splatted to an extended vector of the same element type
   if (DstType->isExtVectorType() && !SrcType->isVectorType()) {
-    // Cast the scalar to element type
-    QualType EltTy = DstType->getAs<ExtVectorType>()->getElementType();
-    llvm::Value *Elt = EmitScalarConversion(
-        Src, SrcType, EltTy, Loc, CGF.getContext().getLangOpts().OpenCL);
+    // Sema should add casts to make sure that the source expression's type is
+    // the same as the vector's element type (sans qualifiers)
+    assert(DstType->castAs<ExtVectorType>()->getElementType().getTypePtr() ==
+               SrcType.getTypePtr() &&
+           "Splatted expr doesn't match with vector element type?");
 
     // Splat the element across to all elements
     unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
-    return Builder.CreateVectorSplat(NumElements, Elt, "splat");
+    return Builder.CreateVectorSplat(NumElements, Src, "splat");
   }
 
   // Allow bitcast from vector to integer/fp of the same size.
@@ -1541,15 +1542,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   }
   case CK_VectorSplat: {
     llvm::Type *DstTy = ConvertType(DestTy);
-    // Need an IgnoreImpCasts here as by default a boolean will be promoted to
-    // an int, which will not perform the sign extension, so if we know we are
-    // going to cast to a vector we have to strip the implicit cast off.
-    Value *Elt = Visit(const_cast<Expr*>(E->IgnoreImpCasts()));
-    Elt = EmitScalarConversion(Elt, E->IgnoreImpCasts()->getType(),
-                               DestTy->getAs<VectorType>()->getElementType(),
-                               CE->getExprLoc(), 
-                               CGF.getContext().getLangOpts().OpenCL);
-
+    Value *Elt = Visit(const_cast<Expr*>(E));
     // Splat the element across to all elements
     unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
     return Builder.CreateVectorSplat(NumElements, Elt, "splat");
@@ -1561,6 +1554,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   case CK_FloatingCast:
     return EmitScalarConversion(Visit(E), E->getType(), DestTy,
                                 CE->getExprLoc());
+  case CK_BooleanToSignedIntegral:
+    return EmitScalarConversion(Visit(E), E->getType(), DestTy,
+                                CE->getExprLoc(),
+                                /*TreatBooleanAsSigned=*/true);
   case CK_IntegralToBoolean:
     return EmitIntToBoolConversion(Visit(E));
   case CK_PointerToBoolean:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 8af39ce..6866789 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -99,3 +99,14 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
         llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0);
   }
 }
+
+llvm::Type *CGOpenCLRuntime::getPipeType() {
+  if (!PipeTy){
+    uint32_t PipeAddrSpc =
+      CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+    PipeTy = llvm::PointerType::get(llvm::StructType::create(
+      CGM.getLLVMContext(), "opencl.pipe_t"), PipeAddrSpc);
+  }
+
+  return PipeTy;
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.h
index 0c50b92..f1a7a31 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.h
@@ -32,9 +32,10 @@ class CodeGenModule;
 class CGOpenCLRuntime {
 protected:
   CodeGenModule &CGM;
+  llvm::Type *PipeTy;
 
 public:
-  CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM) {}
+  CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), PipeTy(nullptr) {}
   virtual ~CGOpenCLRuntime();
 
   /// Emit the IR required for a work-group-local variable declaration, and add
@@ -44,6 +45,8 @@ public:
                                          const VarDecl &D);
 
   virtual llvm::Type *convertOpenCLSpecificType(const Type *T);
+
+  virtual llvm::Type *getPipeType();
 };
 
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 6d4fc9f..3b97ba2 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -84,7 +84,7 @@ public:
 
 protected:
   CGOpenMPRegionKind RegionKind;
-  const RegionCodeGenTy &CodeGen;
+  RegionCodeGenTy CodeGen;
   OpenMPDirectiveKind Kind;
   bool HasCancel;
 };
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
index 6b04fbe..b325637 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -335,7 +335,7 @@ private:
     public:
       /// \brief Kind of a given entry. Currently, only target regions are
       /// supported.
-      enum OffloadingEntryInfoKinds {
+      enum OffloadingEntryInfoKinds : unsigned {
         // Entry is a target region.
         OFFLOAD_ENTRY_INFO_TARGET_REGION = 0,
         // Invalid entry info.
@@ -955,7 +955,7 @@ public:
   /// \brief Emit the target regions enclosed in \a GD function definition or
   /// the function itself in case it is a valid device function. Returns true if
   /// \a GD was dealt with successfully.
-  /// \param FD Function to scan.
+  /// \param GD Function to scan.
   virtual bool emitTargetFunctions(GlobalDecl GD);
 
   /// \brief Emit the global variable if it is a valid device global variable.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
index abef543..0a670ab 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
@@ -26,12 +26,10 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/IR/FunctionInfo.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker/Linker.h"
-#include "llvm/Object/FunctionIndexObjectFile.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
@@ -781,43 +779,11 @@ void CodeGenAction::ExecuteAction() {
       TheModule->setTargetTriple(TargetOpts.Triple);
     }
 
-    auto DiagHandler = [&](const DiagnosticInfo &DI) {
-      TheModule->getContext().diagnose(DI);
-    };
-
-    // If we are performing ThinLTO importing compilation (indicated by
-    // a non-empty index file option), then we need promote to global scope
-    // and rename any local values that are potentially exported to other
-    // modules. Do this early so that the rest of the compilation sees the
-    // promoted symbols.
-    std::unique_ptr<FunctionInfoIndex> Index;
-    if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) {
-      ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
-          llvm::getFunctionIndexForFile(CI.getCodeGenOpts().ThinLTOIndexFile,
-                                        DiagHandler);
-      if (std::error_code EC = IndexOrErr.getError()) {
-        std::string Error = EC.message();
-        errs() << "Error loading index file '"
-               << CI.getCodeGenOpts().ThinLTOIndexFile << "': " << Error
-               << "\n";
-        return;
-      }
-      Index = std::move(IndexOrErr.get());
-      assert(Index);
-      // Currently this requires creating a new Module object.
-      std::unique_ptr<llvm::Module> RenamedModule =
-          renameModuleForThinLTO(std::move(TheModule), Index.get());
-      if (!RenamedModule)
-        return;
-
-      TheModule = std::move(RenamedModule);
-    }
-
     LLVMContext &Ctx = TheModule->getContext();
     Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler);
     EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
                       CI.getLangOpts(), CI.getTarget().getDataLayoutString(),
-                      TheModule.get(), BA, OS, std::move(Index));
+                      TheModule.get(), BA, OS);
     return;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
index 048a043..e38ff0a 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -79,7 +79,7 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
   if (CGM.getCodeGenOpts().ReciprocalMath) {
     FMF.setAllowReciprocal();
   }
-  Builder.SetFastMathFlags(FMF);
+  Builder.setFastMathFlags(FMF);
 }
 
 CodeGenFunction::~CodeGenFunction() {
@@ -195,6 +195,7 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
     case Type::FunctionNoProto:
     case Type::Enum:
     case Type::ObjCObjectPointer:
+    case Type::Pipe:
       return TEK_Scalar;
 
     // Complexes.
@@ -511,7 +512,8 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
         typeQuals += typeQuals.empty() ? "volatile" : " volatile";
     } else {
       uint32_t AddrSpc = 0;
-      if (ty->isImageType())
+      bool isPipe = ty->isPipeType();
+      if (ty->isImageType() || isPipe)
         AddrSpc =
           CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
 
@@ -519,7 +521,11 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
           llvm::ConstantAsMetadata::get(Builder.getInt32(AddrSpc)));
 
       // Get argument type name.
-      std::string typeName = ty.getUnqualifiedType().getAsString(Policy);
+      std::string typeName;
+      if (isPipe)
+        typeName = cast<PipeType>(ty)->getElementType().getAsString(Policy);
+      else
+        typeName = ty.getUnqualifiedType().getAsString(Policy);
 
       // Turn "unsigned type" to "utype"
       std::string::size_type pos = typeName.find("unsigned");
@@ -528,7 +534,12 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
 
       argTypeNames.push_back(llvm::MDString::get(Context, typeName));
 
-      std::string baseTypeName =
+      std::string baseTypeName;
+      if (isPipe)
+        baseTypeName =
+          cast<PipeType>(ty)->getElementType().getCanonicalType().getAsString(Policy);
+      else
+        baseTypeName =
           ty.getUnqualifiedType().getCanonicalType().getAsString(Policy);
 
       // Turn "unsigned type" to "utype"
@@ -543,12 +554,16 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
         typeQuals = "const";
       if (ty.isVolatileQualified())
         typeQuals += typeQuals.empty() ? "volatile" : " volatile";
+      if (isPipe)
+        typeQuals = "pipe";
     }
 
     argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals));
 
-    // Get image access qualifier:
-    if (ty->isImageType()) {
+    // Get image and pipe access qualifier:
+    // FIXME: now image and pipe share the same access qualifier maybe we can
+    // refine it to OpenCL access qualifier and also handle write_read
+    if (ty->isImageType()|| ty->isPipeType()) {
       const OpenCLImageAccessAttr *A = parm->getAttr<OpenCLImageAccessAttr>();
       if (A && A->isWriteOnly())
         accessQuals.push_back(llvm::MDString::get(Context, "write_only"));
@@ -1727,6 +1742,10 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
     case Type::Atomic:
       type = cast<AtomicType>(ty)->getValueType();
       break;
+
+    case Type::Pipe:
+      type = cast<PipeType>(ty)->getElementType();
+      break;
     }
   } while (type->isVariablyModifiedType());
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
index 536c55a..97b1662 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -615,7 +615,20 @@ void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const {
 }
 
 StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
-  StringRef &FoundStr = MangledDeclNames[GD.getCanonicalDecl()];
+  GlobalDecl CanonicalGD = GD.getCanonicalDecl();
+
+  // Some ABIs don't have constructor variants.  Make sure that base and
+  // complete constructors get mangled the same.
+  if (const auto *CD = dyn_cast<CXXConstructorDecl>(CanonicalGD.getDecl())) {
+    if (!getTarget().getCXXABI().hasConstructorVariants()) {
+      CXXCtorType OrigCtorType = GD.getCtorType();
+      assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete);
+      if (OrigCtorType == Ctor_Base)
+        CanonicalGD = GlobalDecl(CD, Ctor_Complete);
+    }
+  }
+
+  StringRef &FoundStr = MangledDeclNames[CanonicalGD];
   if (!FoundStr.empty())
     return FoundStr;
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
index 5ae861e..2c0d93b 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -699,7 +699,7 @@ CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
 
   setFuncName(Name, Linkage);
   CGM.getCoverageMapping()->addFunctionMappingRecord(
-      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
+      FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
 }
 
 void CodeGenPGO::computeRegionCounts(const Decl *D) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
index fcda053..09d9bf1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -628,6 +628,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     }
     break;
   }
+  case Type::Pipe: {
+    ResultType = CGM.getOpenCLRuntime().getPipeType();
+    break;
+  }
   }
   
   assert(ResultType && "Didn't convert a type?");
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
index 1d4d709..03e22cd 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -910,11 +910,11 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
 }
 
 void CoverageMappingModuleGen::addFunctionMappingRecord(
-    llvm::GlobalVariable *NamePtr, StringRef NameValue,
-    uint64_t FuncHash, const std::string &CoverageMapping) {
+    llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
+    const std::string &CoverageMapping, bool isUsed) {
   llvm::LLVMContext &Ctx = CGM.getLLVMContext();
   if (!FunctionRecordTy) {
-    #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
+#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
     llvm::Type *FunctionRecordTypes[] = {
       #include "llvm/ProfileData/InstrProfData.inc"
     };
@@ -929,6 +929,9 @@ void CoverageMappingModuleGen::addFunctionMappingRecord(
   };
   FunctionRecords.push_back(llvm::ConstantStruct::get(
       FunctionRecordTy, makeArrayRef(FunctionRecordVals)));
+  if (!isUsed)
+    FunctionNames.push_back(
+        llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)));
   CoverageMappings += CoverageMapping;
 
   if (CGM.getCodeGenOpts().DumpCoverageMapping) {
@@ -1023,6 +1026,17 @@ void CoverageMappingModuleGen::emit() {
 
   // Make sure the data doesn't get deleted.
   CGM.addUsedGlobal(CovData);
+  // Create the deferred function records array
+  if (!FunctionNames.empty()) {
+    auto NamesArrTy = llvm::ArrayType::get(llvm::Type::getInt8PtrTy(Ctx),
+                                           FunctionNames.size());
+    auto NamesArrVal = llvm::ConstantArray::get(NamesArrTy, FunctionNames);
+    // This variable will *NOT* be emitted to the object file. It is used
+    // to pass the list of names referenced to codegen.
+    new llvm::GlobalVariable(CGM.getModule(), NamesArrTy, true,
+                             llvm::GlobalValue::InternalLinkage, NamesArrVal,
+                             llvm::getCoverageNamesVarName());
+  }
 }
 
 unsigned CoverageMappingModuleGen::getFileID(const FileEntry *File) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
index 0d1bf6d..9ae2bcf 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
@@ -54,6 +54,7 @@ class CoverageMappingModuleGen {
   CoverageSourceInfo &SourceInfo;
   llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries;
   std::vector<llvm::Constant *> FunctionRecords;
+  std::vector<llvm::Constant *> FunctionNames;
   llvm::StructType *FunctionRecordTy;
   std::string CoverageMappings;
 
@@ -70,7 +71,8 @@ public:
   void addFunctionMappingRecord(llvm::GlobalVariable *FunctionName,
                                 StringRef FunctionNameValue,
                                 uint64_t FunctionHash,
-                                const std::string &CoverageMapping);
+                                const std::string &CoverageMapping,
+                                bool isUsed = true);
 
   /// \brief Emit the coverage mapping data for a translation unit.
   void emit();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 0c4008f..e02c8dc 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2715,6 +2715,9 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
   case Type::Auto:
     llvm_unreachable("Undeduced auto type shouldn't get here");
 
+  case Type::Pipe:
+    llvm_unreachable("Pipe types shouldn't get here");
+
   case Type::Builtin:
   // GCC treats vector and complex types as fundamental types.
   case Type::Vector:
@@ -2939,6 +2942,9 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
   case Type::Auto:
     llvm_unreachable("Undeduced auto type shouldn't get here");
 
+  case Type::Pipe:
+    llvm_unreachable("Pipe type shouldn't get here");
+
   case Type::ConstantArray:
   case Type::IncompleteArray:
   case Type::VariableArray:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index b397eb3..f385e53 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -59,8 +59,10 @@ class PCHContainerGenerator : public ASTConsumer {
   struct DebugTypeVisitor : public RecursiveASTVisitor<DebugTypeVisitor> {
     clang::CodeGen::CGDebugInfo &DI;
     ASTContext &Ctx;
-    DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx)
-        : DI(DI), Ctx(Ctx) {}
+    bool SkipTagDecls;
+    DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx,
+                     bool SkipTagDecls)
+        : DI(DI), Ctx(Ctx), SkipTagDecls(SkipTagDecls) {}
 
     /// Determine whether this type can be represented in DWARF.
     static bool CanRepresent(const Type *Ty) {
@@ -75,6 +77,12 @@ class PCHContainerGenerator : public ASTConsumer {
     }
 
     bool VisitTypeDecl(TypeDecl *D) {
+      // TagDecls may be deferred until after all decls have been merged and we
+      // know the complete type. Pure forward declarations will be skipped, but
+      // they don't need to be emitted into the module anyway.
+      if (SkipTagDecls && isa<TagDecl>(D))
+          return true;
+
       QualType QualTy = Ctx.getTypeDeclType(D);
       if (!QualTy.isNull() && CanRepresent(QualTy.getTypePtr()))
         DI.getOrCreateStandaloneType(QualTy, D->getLocation());
@@ -165,7 +173,7 @@ public:
     // Collect debug info for all decls in this group.
     for (auto *I : D)
       if (!I->isFromASTFile()) {
-        DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx);
+        DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, true);
         DTV.TraverseDecl(I);
       }
     return true;
@@ -179,6 +187,11 @@ public:
     if (Diags.hasErrorOccurred())
       return;
 
+    if (D->isFromASTFile())
+      return;
+
+    DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, false);
+    DTV.TraverseDecl(D);
     Builder->UpdateCompletedType(D);
   }
 
diff --git a/contrib/llvm/tools/clang/lib/Driver/Action.cpp b/contrib/llvm/tools/clang/lib/Driver/Action.cpp
index 49dccd2..e9490e9 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Action.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Action.cpp
@@ -8,17 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Driver/Action.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Regex.h"
 #include <cassert>
 using namespace clang::driver;
 using namespace llvm::opt;
 
-Action::~Action() {
-  if (OwnsInputs) {
-    for (iterator it = begin(), ie = end(); it != ie; ++it)
-      delete *it;
-  }
-}
+Action::~Action() {}
 
 const char *Action::getClassName(ActionClass AC) {
   switch (AC) {
@@ -51,33 +48,53 @@ InputAction::InputAction(const Arg &_Input, types::ID _Type)
 
 void BindArchAction::anchor() {}
 
-BindArchAction::BindArchAction(std::unique_ptr<Action> Input,
-                               const char *_ArchName)
-    : Action(BindArchClass, std::move(Input)), ArchName(_ArchName) {}
+BindArchAction::BindArchAction(Action *Input, const char *_ArchName)
+    : Action(BindArchClass, Input), ArchName(_ArchName) {}
+
+// Converts CUDA GPU architecture, e.g. "sm_21", to its corresponding virtual
+// compute arch, e.g. "compute_20".  Returns null if the input arch is null or
+// doesn't match an existing arch.
+static const char* GpuArchToComputeName(const char *ArchName) {
+  if (!ArchName)
+    return nullptr;
+  return llvm::StringSwitch<const char *>(ArchName)
+      .Cases("sm_20", "sm_21", "compute_20")
+      .Case("sm_30", "compute_30")
+      .Case("sm_32", "compute_32")
+      .Case("sm_35", "compute_35")
+      .Case("sm_37", "compute_37")
+      .Case("sm_50", "compute_50")
+      .Case("sm_52", "compute_52")
+      .Case("sm_53", "compute_53")
+      .Default(nullptr);
+}
 
 void CudaDeviceAction::anchor() {}
 
-CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
-                                   const char *ArchName, bool AtTopLevel)
-    : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
-      AtTopLevel(AtTopLevel) {}
-
-void CudaHostAction::anchor() {}
+CudaDeviceAction::CudaDeviceAction(Action *Input, const char *ArchName,
+                                   bool AtTopLevel)
+    : Action(CudaDeviceClass, Input), GpuArchName(ArchName),
+      AtTopLevel(AtTopLevel) {
+  assert(IsValidGpuArchName(GpuArchName));
+}
 
-CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
-                               const ActionList &DeviceActions)
-    : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions) {}
+const char *CudaDeviceAction::getComputeArchName() const {
+  return GpuArchToComputeName(GpuArchName);
+}
 
-CudaHostAction::~CudaHostAction() {
-  for (auto &DA : DeviceActions)
-    delete DA;
+bool CudaDeviceAction::IsValidGpuArchName(llvm::StringRef ArchName) {
+  return GpuArchToComputeName(ArchName.data()) != nullptr;
 }
 
+void CudaHostAction::anchor() {}
+
+CudaHostAction::CudaHostAction(Action *Input, const ActionList &DeviceActions)
+    : Action(CudaHostClass, Input), DeviceActions(DeviceActions) {}
+
 void JobAction::anchor() {}
 
-JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input,
-                     types::ID Type)
-    : Action(Kind, std::move(Input), Type) {}
+JobAction::JobAction(ActionClass Kind, Action *Input, types::ID Type)
+    : Action(Kind, Input, Type) {}
 
 JobAction::JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type)
   : Action(Kind, Inputs, Type) {
@@ -85,45 +102,38 @@ JobAction::JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type)
 
 void PreprocessJobAction::anchor() {}
 
-PreprocessJobAction::PreprocessJobAction(std::unique_ptr<Action> Input,
-                                         types::ID OutputType)
-    : JobAction(PreprocessJobClass, std::move(Input), OutputType) {}
+PreprocessJobAction::PreprocessJobAction(Action *Input, types::ID OutputType)
+    : JobAction(PreprocessJobClass, Input, OutputType) {}
 
 void PrecompileJobAction::anchor() {}
 
-PrecompileJobAction::PrecompileJobAction(std::unique_ptr<Action> Input,
-                                         types::ID OutputType)
-    : JobAction(PrecompileJobClass, std::move(Input), OutputType) {}
+PrecompileJobAction::PrecompileJobAction(Action *Input, types::ID OutputType)
+    : JobAction(PrecompileJobClass, Input, OutputType) {}
 
 void AnalyzeJobAction::anchor() {}
 
-AnalyzeJobAction::AnalyzeJobAction(std::unique_ptr<Action> Input,
-                                   types::ID OutputType)
-    : JobAction(AnalyzeJobClass, std::move(Input), OutputType) {}
+AnalyzeJobAction::AnalyzeJobAction(Action *Input, types::ID OutputType)
+    : JobAction(AnalyzeJobClass, Input, OutputType) {}
 
 void MigrateJobAction::anchor() {}
 
-MigrateJobAction::MigrateJobAction(std::unique_ptr<Action> Input,
-                                   types::ID OutputType)
-    : JobAction(MigrateJobClass, std::move(Input), OutputType) {}
+MigrateJobAction::MigrateJobAction(Action *Input, types::ID OutputType)
+    : JobAction(MigrateJobClass, Input, OutputType) {}
 
 void CompileJobAction::anchor() {}
 
-CompileJobAction::CompileJobAction(std::unique_ptr<Action> Input,
-                                   types::ID OutputType)
-    : JobAction(CompileJobClass, std::move(Input), OutputType) {}
+CompileJobAction::CompileJobAction(Action *Input, types::ID OutputType)
+    : JobAction(CompileJobClass, Input, OutputType) {}
 
 void BackendJobAction::anchor() {}
 
-BackendJobAction::BackendJobAction(std::unique_ptr<Action> Input,
-                                   types::ID OutputType)
-    : JobAction(BackendJobClass, std::move(Input), OutputType) {}
+BackendJobAction::BackendJobAction(Action *Input, types::ID OutputType)
+    : JobAction(BackendJobClass, Input, OutputType) {}
 
 void AssembleJobAction::anchor() {}
 
-AssembleJobAction::AssembleJobAction(std::unique_ptr<Action> Input,
-                                     types::ID OutputType)
-    : JobAction(AssembleJobClass, std::move(Input), OutputType) {}
+AssembleJobAction::AssembleJobAction(Action *Input, types::ID OutputType)
+    : JobAction(AssembleJobClass, Input, OutputType) {}
 
 void LinkJobAction::anchor() {}
 
@@ -145,21 +155,20 @@ DsymutilJobAction::DsymutilJobAction(ActionList &Inputs, types::ID Type)
 
 void VerifyJobAction::anchor() {}
 
-VerifyJobAction::VerifyJobAction(ActionClass Kind,
-                                 std::unique_ptr<Action> Input, types::ID Type)
-    : JobAction(Kind, std::move(Input), Type) {
+VerifyJobAction::VerifyJobAction(ActionClass Kind, Action *Input,
+                                 types::ID Type)
+    : JobAction(Kind, Input, Type) {
   assert((Kind == VerifyDebugInfoJobClass || Kind == VerifyPCHJobClass) &&
          "ActionClass is not a valid VerifyJobAction");
 }
 
 void VerifyDebugInfoJobAction::anchor() {}
 
-VerifyDebugInfoJobAction::VerifyDebugInfoJobAction(
-    std::unique_ptr<Action> Input, types::ID Type)
-    : VerifyJobAction(VerifyDebugInfoJobClass, std::move(Input), Type) {}
+VerifyDebugInfoJobAction::VerifyDebugInfoJobAction(Action *Input,
+                                                   types::ID Type)
+    : VerifyJobAction(VerifyDebugInfoJobClass, Input, Type) {}
 
 void VerifyPCHJobAction::anchor() {}
 
-VerifyPCHJobAction::VerifyPCHJobAction(std::unique_ptr<Action> Input,
-                                       types::ID Type)
-    : VerifyJobAction(VerifyPCHJobClass, std::move(Input), Type) {}
+VerifyPCHJobAction::VerifyPCHJobAction(Action *Input, types::ID Type)
+    : VerifyJobAction(VerifyPCHJobClass, Input, Type) {}
diff --git a/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp b/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp
index e4af2a6..1c2eecd 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp
@@ -40,11 +40,6 @@ Compilation::~Compilation() {
     if (it->second != TranslatedArgs)
       delete it->second;
 
-  // Free the actions, if built.
-  for (ActionList::iterator it = Actions.begin(), ie = Actions.end();
-       it != ie; ++it)
-    delete *it;
-
   // Free redirections of stdout/stderr.
   if (Redirects) {
     delete Redirects[1];
@@ -208,7 +203,8 @@ void Compilation::initCompilationForDiagnostics() {
   ForDiagnostics = true;
 
   // Free actions and jobs.
-  DeleteContainerPointers(Actions);
+  Actions.clear();
+  AllActions.clear();
   Jobs.clear();
 
   // Clear temporary/results file lists.
diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
index 85bbcb4..1e0a48d 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
@@ -1049,19 +1049,15 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
           << types::getTypeName(Act->getType());
 
     ActionList Inputs;
-    for (unsigned i = 0, e = Archs.size(); i != e; ++i) {
-      Inputs.push_back(
-          new BindArchAction(std::unique_ptr<Action>(Act), Archs[i]));
-      if (i != 0)
-        Inputs.back()->setOwnsInputs(false);
-    }
+    for (unsigned i = 0, e = Archs.size(); i != e; ++i)
+      Inputs.push_back(C.MakeAction<BindArchAction>(Act, Archs[i]));
 
     // Lipo if necessary, we do it this way because we need to set the arch flag
     // so that -Xarch_ gets overwritten.
     if (Inputs.size() == 1 || Act->getType() == types::TY_Nothing)
       Actions.append(Inputs.begin(), Inputs.end());
     else
-      Actions.push_back(new LipoJobAction(Inputs, Act->getType()));
+      Actions.push_back(C.MakeAction<LipoJobAction>(Inputs, Act->getType()));
 
     // Handle debug info queries.
     Arg *A = Args.getLastArg(options::OPT_g_Group);
@@ -1077,15 +1073,16 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
         ActionList Inputs;
         Inputs.push_back(Actions.back());
         Actions.pop_back();
-        Actions.push_back(new DsymutilJobAction(Inputs, types::TY_dSYM));
+        Actions.push_back(
+            C.MakeAction<DsymutilJobAction>(Inputs, types::TY_dSYM));
       }
 
       // Verify the debug info output.
       if (Args.hasArg(options::OPT_verify_debug_info)) {
-        std::unique_ptr<Action> VerifyInput(Actions.back());
+        Action* LastAction = Actions.back();
         Actions.pop_back();
-        Actions.push_back(new VerifyDebugInfoJobAction(std::move(VerifyInput),
-                                                       types::TY_Nothing));
+        Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
+            LastAction, types::TY_Nothing));
       }
     }
   }
@@ -1283,26 +1280,29 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
 // Actions and /p Current is released. Otherwise the function creates
 // and returns a new CudaHostAction which wraps /p Current and device
 // side actions.
-static std::unique_ptr<Action>
-buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
-                 std::unique_ptr<Action> HostAction, ActionList &Actions) {
+static Action *buildCudaActions(Compilation &C, DerivedArgList &Args,
+                                const Arg *InputArg, Action *HostAction,
+                                ActionList &Actions) {
   Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
                                                options::OPT_cuda_device_only);
   // Host-only compilation case.
   if (PartialCompilationArg &&
       PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only))
-    return std::unique_ptr<Action>(
-        new CudaHostAction(std::move(HostAction), {}));
+    return C.MakeAction<CudaHostAction>(HostAction, ActionList());
 
   // Collect all cuda_gpu_arch parameters, removing duplicates.
   SmallVector<const char *, 4> GpuArchList;
   llvm::StringSet<> GpuArchNames;
   for (Arg *A : Args) {
-    if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) {
-      A->claim();
-      if (GpuArchNames.insert(A->getValue()).second)
-        GpuArchList.push_back(A->getValue());
-    }
+    if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
+      continue;
+    A->claim();
+
+    const auto& Arch = A->getValue();
+    if (!CudaDeviceAction::IsValidGpuArchName(Arch))
+      C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch;
+    else if (GpuArchNames.insert(Arch).second)
+      GpuArchList.push_back(Arch);
   }
 
   // Default to sm_20 which is the lowest common denominator for supported GPUs.
@@ -1325,13 +1325,10 @@ buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
          "Failed to create actions for all devices");
 
   // Check whether any of device actions stopped before they could generate PTX.
-  bool PartialCompilation = false;
-  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
-    if (CudaDeviceActions[I]->getKind() != Action::BackendJobClass) {
-      PartialCompilation = true;
-      break;
-    }
-  }
+  bool PartialCompilation =
+      llvm::any_of(CudaDeviceActions, [](const Action *a) {
+        return a->getKind() != Action::BackendJobClass;
+      });
 
   // Figure out what to do with device actions -- pass them as inputs to the
   // host action or run each of them independently.
@@ -1350,12 +1347,12 @@ buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
     }
 
     for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
-      Actions.push_back(new CudaDeviceAction(
-          std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
-          /* AtTopLevel */ true));
+      Actions.push_back(C.MakeAction<CudaDeviceAction>(CudaDeviceActions[I],
+                                                       GpuArchList[I],
+                                                       /* AtTopLevel */ true));
     // Kill host action in case of device-only compilation.
     if (DeviceOnlyCompilation)
-      HostAction.reset(nullptr);
+      return nullptr;
     return HostAction;
   }
 
@@ -1363,13 +1360,12 @@ buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
   // with AtTopLevel=false and become inputs for the host action.
   ActionList DeviceActions;
   for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
-    DeviceActions.push_back(new CudaDeviceAction(
-        std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
-        /* AtTopLevel */ false));
+    DeviceActions.push_back(
+        C.MakeAction<CudaDeviceAction>(CudaDeviceActions[I], GpuArchList[I],
+                                       /* AtTopLevel */ false));
   // Return a new host action that incorporates original host action and all
   // device actions.
-  return std::unique_ptr<Action>(
-      new CudaHostAction(std::move(HostAction), DeviceActions));
+  return C.MakeAction<CudaHostAction>(HostAction, DeviceActions);
 }
 
 void Driver::BuildActions(Compilation &C, const ToolChain &TC,
@@ -1470,15 +1466,14 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
       continue;
     }
 
-    phases::ID CudaInjectionPhase = FinalPhase;
-    for (const auto &Phase : PL)
-      if (Phase <= FinalPhase && Phase == phases::Compile) {
-        CudaInjectionPhase = Phase;
-        break;
-      }
+    phases::ID CudaInjectionPhase =
+        (phases::Compile < FinalPhase &&
+         llvm::find(PL, phases::Compile) != PL.end())
+            ? phases::Compile
+            : FinalPhase;
 
     // Build the pipeline for this file.
-    std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType));
+    Action *Current = C.MakeAction<InputAction>(*InputArg, InputType);
     for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
          i != e; ++i) {
       phases::ID Phase = *i;
@@ -1490,7 +1485,8 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
       // Queue linker inputs.
       if (Phase == phases::Link) {
         assert((i + 1) == e && "linking must be final compilation step.");
-        LinkerInputs.push_back(Current.release());
+        LinkerInputs.push_back(Current);
+        Current = nullptr;
         break;
       }
 
@@ -1501,11 +1497,10 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
         continue;
 
       // Otherwise construct the appropriate action.
-      Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
+      Current = ConstructPhaseAction(C, TC, Args, Phase, Current);
 
       if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
-        Current =
-            buildCudaActions(C, Args, InputArg, std::move(Current), Actions);
+        Current = buildCudaActions(C, Args, InputArg, Current, Actions);
         if (!Current)
           break;
       }
@@ -1516,12 +1511,13 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
 
     // If we ended with something, add to the output list.
     if (Current)
-      Actions.push_back(Current.release());
+      Actions.push_back(Current);
   }
 
   // Add a link action if necessary.
   if (!LinkerInputs.empty())
-    Actions.push_back(new LinkJobAction(LinkerInputs, types::TY_Image));
+    Actions.push_back(
+        C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image));
 
   // If we are linking, claim any options which are obviously only used for
   // compilation.
@@ -1538,10 +1534,9 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
   Args.ClaimAllArgs(options::OPT_cuda_host_only);
 }
 
-std::unique_ptr<Action>
-Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
-                             phases::ID Phase,
-                             std::unique_ptr<Action> Input) const {
+Action *Driver::ConstructPhaseAction(Compilation &C, const ToolChain &TC,
+                                     const ArgList &Args, phases::ID Phase,
+                                     Action *Input) const {
   llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
   // Build the appropriate action.
   switch (Phase) {
@@ -1561,7 +1556,7 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
       assert(OutputTy != types::TY_INVALID &&
              "Cannot preprocess this input type!");
     }
-    return llvm::make_unique<PreprocessJobAction>(std::move(Input), OutputTy);
+    return C.MakeAction<PreprocessJobAction>(Input, OutputTy);
   }
   case phases::Precompile: {
     types::ID OutputTy = types::TY_PCH;
@@ -1569,53 +1564,43 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
       // Syntax checks should not emit a PCH file
       OutputTy = types::TY_Nothing;
     }
-    return llvm::make_unique<PrecompileJobAction>(std::move(Input), OutputTy);
+    return C.MakeAction<PrecompileJobAction>(Input, OutputTy);
   }
   case phases::Compile: {
     if (Args.hasArg(options::OPT_fsyntax_only))
-      return llvm::make_unique<CompileJobAction>(std::move(Input),
-                                                 types::TY_Nothing);
+      return C.MakeAction<CompileJobAction>(Input, types::TY_Nothing);
     if (Args.hasArg(options::OPT_rewrite_objc))
-      return llvm::make_unique<CompileJobAction>(std::move(Input),
-                                                 types::TY_RewrittenObjC);
+      return C.MakeAction<CompileJobAction>(Input, types::TY_RewrittenObjC);
     if (Args.hasArg(options::OPT_rewrite_legacy_objc))
-      return llvm::make_unique<CompileJobAction>(std::move(Input),
-                                                 types::TY_RewrittenLegacyObjC);
+      return C.MakeAction<CompileJobAction>(Input,
+                                            types::TY_RewrittenLegacyObjC);
     if (Args.hasArg(options::OPT__analyze, options::OPT__analyze_auto))
-      return llvm::make_unique<AnalyzeJobAction>(std::move(Input),
-                                                 types::TY_Plist);
+      return C.MakeAction<AnalyzeJobAction>(Input, types::TY_Plist);
     if (Args.hasArg(options::OPT__migrate))
-      return llvm::make_unique<MigrateJobAction>(std::move(Input),
-                                                 types::TY_Remap);
+      return C.MakeAction<MigrateJobAction>(Input, types::TY_Remap);
     if (Args.hasArg(options::OPT_emit_ast))
-      return llvm::make_unique<CompileJobAction>(std::move(Input),
-                                                 types::TY_AST);
+      return C.MakeAction<CompileJobAction>(Input, types::TY_AST);
     if (Args.hasArg(options::OPT_module_file_info))
-      return llvm::make_unique<CompileJobAction>(std::move(Input),
-                                                 types::TY_ModuleFile);
+      return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile);
     if (Args.hasArg(options::OPT_verify_pch))
-      return llvm::make_unique<VerifyPCHJobAction>(std::move(Input),
-                                                   types::TY_Nothing);
-    return llvm::make_unique<CompileJobAction>(std::move(Input),
-                                               types::TY_LLVM_BC);
+      return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing);
+    return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
   }
   case phases::Backend: {
     if (isUsingLTO()) {
       types::ID Output =
           Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
-      return llvm::make_unique<BackendJobAction>(std::move(Input), Output);
+      return C.MakeAction<BackendJobAction>(Input, Output);
     }
     if (Args.hasArg(options::OPT_emit_llvm)) {
       types::ID Output =
           Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
-      return llvm::make_unique<BackendJobAction>(std::move(Input), Output);
+      return C.MakeAction<BackendJobAction>(Input, Output);
     }
-    return llvm::make_unique<BackendJobAction>(std::move(Input),
-                                               types::TY_PP_Asm);
+    return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm);
   }
   case phases::Assemble:
-    return llvm::make_unique<AssembleJobAction>(std::move(Input),
-                                                types::TY_Object);
+    return C.MakeAction<AssembleJobAction>(Input, types::TY_Object);
   }
 
   llvm_unreachable("invalid phase in ConstructPhaseAction");
@@ -1662,12 +1647,11 @@ void Driver::BuildJobs(Compilation &C) const {
         LinkingOutput = getDefaultImageName();
     }
 
-    InputInfo II;
     BuildJobsForAction(C, A, &C.getDefaultToolChain(),
                        /*BoundArch*/ nullptr,
                        /*AtTopLevel*/ true,
                        /*MultipleArchs*/ ArchNames.size() > 1,
-                       /*LinkingOutput*/ LinkingOutput, II);
+                       /*LinkingOutput*/ LinkingOutput);
   }
 
   // If the user passed -Qunused-arguments or there were errors, don't warn
@@ -1795,21 +1779,19 @@ static const Tool *selectToolForJob(Compilation &C, bool SaveTemps,
   return ToolForJob;
 }
 
-void Driver::BuildJobsForAction(Compilation &C, const Action *A,
-                                const ToolChain *TC, const char *BoundArch,
-                                bool AtTopLevel, bool MultipleArchs,
-                                const char *LinkingOutput,
-                                InputInfo &Result) const {
+InputInfo Driver::BuildJobsForAction(Compilation &C, const Action *A,
+                                     const ToolChain *TC, const char *BoundArch,
+                                     bool AtTopLevel, bool MultipleArchs,
+                                     const char *LinkingOutput) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
 
   InputInfoList CudaDeviceInputInfos;
   if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
-    InputInfo II;
     // Append outputs of device jobs to the input list.
     for (const Action *DA : CHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
-                         /*MultipleArchs*/ false, LinkingOutput, II);
-      CudaDeviceInputInfos.push_back(II);
+      CudaDeviceInputInfos.push_back(
+          BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
+                             /*MultipleArchs*/ false, LinkingOutput));
     }
     // Override current action with a real host compile action and continue
     // processing it.
@@ -1823,11 +1805,9 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
     Input.claim();
     if (Input.getOption().matches(options::OPT_INPUT)) {
       const char *Name = Input.getValue();
-      Result = InputInfo(Name, A->getType(), Name);
-    } else {
-      Result = InputInfo(&Input, A->getType(), "");
+      return InputInfo(A, Name, /* BaseInput = */ Name);
     }
-    return;
+    return InputInfo(A, &Input, /* BaseInput = */ "");
   }
 
   if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
@@ -1841,19 +1821,17 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
     else
       TC = &C.getDefaultToolChain();
 
-    BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,
-                       MultipleArchs, LinkingOutput, Result);
-    return;
+    return BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,
+                              MultipleArchs, LinkingOutput);
   }
 
   if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
     // Initial processing of CudaDeviceAction carries host params.
     // Call BuildJobsForAction() again, now with correct device parameters.
     assert(CDA->getGpuArchName() && "No GPU name in device action.");
-    BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
-                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
-                       /*MultipleArchs*/ true, LinkingOutput, Result);
-    return;
+    return BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
+                              CDA->getGpuArchName(), CDA->isAtTopLevel(),
+                              /*MultipleArchs*/ true, LinkingOutput);
   }
 
   const ActionList *Inputs = &A->getInputs();
@@ -1863,16 +1841,15 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
   const Tool *T =
       selectToolForJob(C, isSaveTempsEnabled(), TC, JA, Inputs, CollapsedCHA);
   if (!T)
-    return;
+    return InputInfo();
 
   // If we've collapsed action list that contained CudaHostAction we
   // need to build jobs for device-side inputs it may have held.
   if (CollapsedCHA) {
-    InputInfo II;
     for (const Action *DA : CollapsedCHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
-                         /*MultipleArchs*/ false, LinkingOutput, II);
-      CudaDeviceInputInfos.push_back(II);
+      CudaDeviceInputInfos.push_back(
+          BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+                             /*MultipleArchs*/ false, LinkingOutput));
     }
   }
 
@@ -1882,14 +1859,11 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
     // Treat dsymutil and verify sub-jobs as being at the top-level too, they
     // shouldn't get temporary output names.
     // FIXME: Clean this up.
-    bool SubJobAtTopLevel = false;
-    if (AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A)))
-      SubJobAtTopLevel = true;
-
-    InputInfo II;
-    BuildJobsForAction(C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs,
-                       LinkingOutput, II);
-    InputInfos.push_back(II);
+    bool SubJobAtTopLevel =
+        AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A));
+    InputInfos.push_back(BuildJobsForAction(C, Input, TC, BoundArch,
+                                            SubJobAtTopLevel, MultipleArchs,
+                                            LinkingOutput));
   }
 
   // Always use the first input as the base input.
@@ -1905,12 +1879,13 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
     InputInfos.append(CudaDeviceInputInfos.begin(), CudaDeviceInputInfos.end());
 
   // Determine the place to write output to, if any.
+  InputInfo Result;
   if (JA->getType() == types::TY_Nothing)
-    Result = InputInfo(A->getType(), BaseInput);
+    Result = InputInfo(A, BaseInput);
   else
-    Result = InputInfo(GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
-                                          AtTopLevel, MultipleArchs),
-                       A->getType(), BaseInput);
+    Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
+                                             AtTopLevel, MultipleArchs),
+                       BaseInput);
 
   if (CCCPrintBindings && !CCGenDiagnostics) {
     llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"'
@@ -1925,6 +1900,7 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
     T->ConstructJob(C, *JA, Result, InputInfos,
                     C.getArgsForToolChain(TC, BoundArch), LinkingOutput);
   }
+  return Result;
 }
 
 const char *Driver::getDefaultImageName() const {
diff --git a/contrib/llvm/tools/clang/lib/Driver/InputInfo.h b/contrib/llvm/tools/clang/lib/Driver/InputInfo.h
index b23ba57..0c36e81 100644
--- a/contrib/llvm/tools/clang/lib/Driver/InputInfo.h
+++ b/contrib/llvm/tools/clang/lib/Driver/InputInfo.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_INPUTINFO_H
 #define LLVM_CLANG_LIB_DRIVER_INPUTINFO_H
 
+#include "clang/Driver/Action.h"
 #include "clang/Driver/Types.h"
 #include "llvm/Option/Arg.h"
 #include <cassert>
@@ -38,21 +39,36 @@ class InputInfo {
     const llvm::opt::Arg *InputArg;
   } Data;
   Class Kind;
+  const Action* Act;
   types::ID Type;
   const char *BaseInput;
 
+  static types::ID GetActionType(const Action *A) {
+    return A != nullptr ? A->getType() : types::TY_Nothing;
+  }
+
 public:
-  InputInfo() {}
-  InputInfo(types::ID _Type, const char *_BaseInput)
-    : Kind(Nothing), Type(_Type), BaseInput(_BaseInput) {
+  InputInfo() : InputInfo(nullptr, nullptr) {}
+  InputInfo(const Action *A, const char *_BaseInput)
+      : Kind(Nothing), Act(A), Type(GetActionType(A)), BaseInput(_BaseInput) {}
+
+  InputInfo(types::ID _Type, const char *_Filename, const char *_BaseInput)
+      : Kind(Filename), Act(nullptr), Type(_Type), BaseInput(_BaseInput) {
+    Data.Filename = _Filename;
   }
-  InputInfo(const char *_Filename, types::ID _Type, const char *_BaseInput)
-    : Kind(Filename), Type(_Type), BaseInput(_BaseInput) {
+  InputInfo(const Action *A, const char *_Filename, const char *_BaseInput)
+      : Kind(Filename), Act(A), Type(GetActionType(A)), BaseInput(_BaseInput) {
     Data.Filename = _Filename;
   }
-  InputInfo(const llvm::opt::Arg *_InputArg, types::ID _Type,
+
+  InputInfo(types::ID _Type, const llvm::opt::Arg *_InputArg,
+            const char *_BaseInput)
+      : Kind(InputArg), Act(nullptr), Type(_Type), BaseInput(_BaseInput) {
+    Data.InputArg = _InputArg;
+  }
+  InputInfo(const Action *A, const llvm::opt::Arg *_InputArg,
             const char *_BaseInput)
-      : Kind(InputArg), Type(_Type), BaseInput(_BaseInput) {
+      : Kind(InputArg), Act(A), Type(GetActionType(A)), BaseInput(_BaseInput) {
     Data.InputArg = _InputArg;
   }
 
@@ -61,6 +77,9 @@ public:
   bool isInputArg() const { return Kind == InputArg; }
   types::ID getType() const { return Type; }
   const char *getBaseInput() const { return BaseInput; }
+  /// The action for which this InputInfo was created.  May be null.
+  const Action *getAction() const { return Act; }
+  void setAction(const Action *A) { Act = A; }
 
   const char *getFilename() const {
     assert(isFilename() && "Invalid accessor.");
diff --git a/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
index b7e576e..6874715 100644
--- a/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
@@ -634,6 +634,96 @@ SanitizerMask MSVCToolChain::getSupportedSanitizers() const {
   return Res;
 }
 
+static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL,
+                            bool SupportsForcingFramePointer,
+                            const char *ExpandChar, const OptTable &Opts) {
+  assert(A->getOption().matches(options::OPT__SLASH_O));
+
+  StringRef OptStr = A->getValue();
+  for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
+    const char &OptChar = *(OptStr.data() + I);
+    switch (OptChar) {
+    default:
+      break;
+    case '1':
+    case '2':
+    case 'x':
+    case 'd':
+      if (&OptChar == ExpandChar) {
+        if (OptChar == 'd') {
+          DAL.AddFlagArg(A, Opts.getOption(options::OPT_O0));
+        } else {
+          if (OptChar == '1') {
+            DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
+          } else if (OptChar == '2' || OptChar == 'x') {
+            DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
+            DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
+          }
+          if (SupportsForcingFramePointer)
+            DAL.AddFlagArg(A,
+                           Opts.getOption(options::OPT_fomit_frame_pointer));
+          if (OptChar == '1' || OptChar == '2')
+            DAL.AddFlagArg(A,
+                           Opts.getOption(options::OPT_ffunction_sections));
+        }
+      }
+      break;
+    case 'b':
+      if (I + 1 != E && isdigit(OptStr[I + 1]))
+        ++I;
+      break;
+    case 'g':
+      break;
+    case 'i':
+      if (I + 1 != E && OptStr[I + 1] == '-') {
+        ++I;
+        DAL.AddFlagArg(A, Opts.getOption(options::OPT_fno_builtin));
+      } else {
+        DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
+      }
+      break;
+    case 's':
+      DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
+      break;
+    case 't':
+      DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
+      break;
+    case 'y': {
+      bool OmitFramePointer = true;
+      if (I + 1 != E && OptStr[I + 1] == '-') {
+        OmitFramePointer = false;
+        ++I;
+      }
+      if (SupportsForcingFramePointer) {
+        if (OmitFramePointer)
+          DAL.AddFlagArg(A,
+                         Opts.getOption(options::OPT_fomit_frame_pointer));
+        else
+          DAL.AddFlagArg(
+              A, Opts.getOption(options::OPT_fno_omit_frame_pointer));
+      }
+      break;
+    }
+    }
+  }
+}
+
+static void TranslateDArg(Arg *A, llvm::opt::DerivedArgList &DAL,
+                          const OptTable &Opts) {
+  assert(A->getOption().matches(options::OPT_D));
+
+  StringRef Val = A->getValue();
+  size_t Hash = Val.find('#');
+  if (Hash == StringRef::npos || Hash > Val.find('=')) {
+    DAL.append(A);
+    return;
+  }
+
+  std::string NewVal = Val;
+  NewVal[Hash] = '=';
+  DAL.AddJoinedArg(A, Opts.getOption(options::OPT_D), NewVal);
+}
+
 llvm::opt::DerivedArgList *
 MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
                              const char *BoundArch) const {
@@ -664,81 +754,18 @@ MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
     }
   }
 
-  // The -O flag actually takes an amalgam of other options.  For example,
-  // '/Ogyb2' is equivalent to '/Og' '/Oy' '/Ob2'.
   for (Arg *A : Args) {
-    if (!A->getOption().matches(options::OPT__SLASH_O)) {
+    if (A->getOption().matches(options::OPT__SLASH_O)) {
+      // The -O flag actually takes an amalgam of other options.  For example,
+      // '/Ogyb2' is equivalent to '/Og' '/Oy' '/Ob2'.
+      TranslateOptArg(A, *DAL, SupportsForcingFramePointer, ExpandChar, Opts);
+    } else if (A->getOption().matches(options::OPT_D)) {
+      // Translate -Dfoo#bar into -Dfoo=bar.
+      TranslateDArg(A, *DAL, Opts);
+    } else {
       DAL->append(A);
-      continue;
-    }
-
-    StringRef OptStr = A->getValue();
-    for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
-      const char &OptChar = *(OptStr.data() + I);
-      switch (OptChar) {
-      default:
-        break;
-      case '1':
-      case '2':
-      case 'x':
-      case 'd':
-        if (&OptChar == ExpandChar) {
-          if (OptChar == 'd') {
-            DAL->AddFlagArg(A, Opts.getOption(options::OPT_O0));
-          } else {
-            if (OptChar == '1') {
-              DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
-            } else if (OptChar == '2' || OptChar == 'x') {
-              DAL->AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
-              DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
-            }
-            if (SupportsForcingFramePointer)
-              DAL->AddFlagArg(A,
-                              Opts.getOption(options::OPT_fomit_frame_pointer));
-            if (OptChar == '1' || OptChar == '2')
-              DAL->AddFlagArg(A,
-                              Opts.getOption(options::OPT_ffunction_sections));
-          }
-        }
-        break;
-      case 'b':
-        if (I + 1 != E && isdigit(OptStr[I + 1]))
-          ++I;
-        break;
-      case 'g':
-        break;
-      case 'i':
-        if (I + 1 != E && OptStr[I + 1] == '-') {
-          ++I;
-          DAL->AddFlagArg(A, Opts.getOption(options::OPT_fno_builtin));
-        } else {
-          DAL->AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
-        }
-        break;
-      case 's':
-        DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
-        break;
-      case 't':
-        DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
-        break;
-      case 'y': {
-        bool OmitFramePointer = true;
-        if (I + 1 != E && OptStr[I + 1] == '-') {
-          OmitFramePointer = false;
-          ++I;
-        }
-        if (SupportsForcingFramePointer) {
-          if (OmitFramePointer)
-            DAL->AddFlagArg(A,
-                            Opts.getOption(options::OPT_fomit_frame_pointer));
-          else
-            DAL->AddFlagArg(
-                A, Opts.getOption(options::OPT_fno_omit_frame_pointer));
-        }
-        break;
-      }
-      }
     }
   }
+
   return DAL;
 }
diff --git a/contrib/llvm/tools/clang/lib/Driver/MinGWToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/MinGWToolChain.cpp
index c5287bb..938440b 100644
--- a/contrib/llvm/tools/clang/lib/Driver/MinGWToolChain.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/MinGWToolChain.cpp
@@ -66,17 +66,23 @@ MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : ToolChain(D, Triple, Args) {
   getProgramPaths().push_back(getDriver().getInstalledDir());
 
-  // On Windows if there is no sysroot we search for gcc on the PATH.
-  if (getDriver().SysRoot.size())
-  Base = getDriver().SysRoot;
+// In Windows there aren't any standard install locations, we search
+// for gcc on the PATH. In Linux the base is always /usr.
 #ifdef LLVM_ON_WIN32
+  if (getDriver().SysRoot.size())
+    Base = getDriver().SysRoot;
   else if (llvm::ErrorOr<std::string> GPPName =
                llvm::sys::findProgramByName("gcc"))
     Base = llvm::sys::path::parent_path(
         llvm::sys::path::parent_path(GPPName.get()));
-#endif
-  if (!Base.size())
+  else
     Base = llvm::sys::path::parent_path(getDriver().getInstalledDir());
+#else
+  if (getDriver().SysRoot.size())
+    Base = getDriver().SysRoot;
+  else
+    Base = "/usr";
+#endif
 
   Base += llvm::sys::path::get_separator();
   findGccLibDir();
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
index 7ece321..beede2e 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
@@ -526,7 +526,7 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
     // no environment variable defined, see if we can set the default based
     // on -isysroot.
     if (OSXTarget.empty() && iOSTarget.empty() && WatchOSTarget.empty() &&
-        Args.hasArg(options::OPT_isysroot)) {
+        TvOSTarget.empty() && Args.hasArg(options::OPT_isysroot)) {
       if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
         StringRef isysroot = A->getValue();
         // Assume SDK has path: SOME_PATH/SDKs/PlatformXX.YY.sdk
@@ -2716,13 +2716,8 @@ const StringRef HexagonToolChain::GetDefaultCPU() {
 
 const StringRef HexagonToolChain::GetTargetCPUVersion(const ArgList &Args) {
   Arg *CpuArg = nullptr;
-
-  for (auto &A : Args) {
-    if (A->getOption().matches(options::OPT_mcpu_EQ)) {
-      CpuArg = A;
-      A->claim();
-    }
-  }
+  if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ, options::OPT_march_EQ))
+    CpuArg = A;
 
   StringRef CPU = CpuArg ? CpuArg->getValue() : GetDefaultCPU();
   if (CPU.startswith("hexagon"))
diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
index 8468105..5a2dbd3 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
@@ -2069,6 +2069,16 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args,
   CmdArgs.push_back("-machine-sink-split=0");
 }
 
+void Clang::AddWebAssemblyTargetArgs(const ArgList &Args,
+                                     ArgStringList &CmdArgs) const {
+  // Default to "hidden" visibility.
+  if (!Args.hasArg(options::OPT_fvisibility_EQ,
+                   options::OPT_fvisibility_ms_compat)) {
+    CmdArgs.push_back("-fvisibility");
+    CmdArgs.push_back("hidden");
+  }
+}
+
 // Decode AArch64 features from string like +[no]featureA+[no]featureB+...
 static bool DecodeAArch64Features(const Driver &D, StringRef text,
                                   std::vector<const char *> &Features) {
@@ -2970,7 +2980,7 @@ static void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T,
   ExtractArgs.push_back(OutFile);
 
   const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy"));
-  InputInfo II(Output.getFilename(), types::TY_Object, Output.getFilename());
+  InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename());
 
   // First extract the dwo sections.
   C.addCommand(llvm::make_unique<Command>(JA, T, Exec, ExtractArgs, II));
@@ -3253,8 +3263,9 @@ ParsePICArgs(const ToolChain &ToolChain, const llvm::Triple &Triple,
   // ToolChain.getTriple() and Triple?
   bool PIE = ToolChain.isPIEDefault();
   bool PIC = PIE || ToolChain.isPICDefault();
-  // The Darwin default to use PIC does not apply when using -static.
-  if (ToolChain.getTriple().isOSDarwin() && Args.hasArg(options::OPT_static))
+  // The Darwin/MachO default to use PIC does not apply when using -static.
+  if (ToolChain.getTriple().isOSBinFormatMachO() &&
+      Args.hasArg(options::OPT_static))
     PIE = PIC = false;
   bool IsPICLevelTwo = PIC;
 
@@ -4015,6 +4026,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   case llvm::Triple::hexagon:
     AddHexagonTargetArgs(Args, CmdArgs);
     break;
+
+  case llvm::Triple::wasm32:
+  case llvm::Triple::wasm64:
+    AddWebAssemblyTargetArgs(Args, CmdArgs);
+    break;
   }
 
   // The 'g' groups options involve a somewhat intricate sequence of decisions
@@ -4176,8 +4192,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-generate-type-units");
   }
 
-  // CloudABI uses -ffunction-sections and -fdata-sections by default.
-  bool UseSeparateSections = Triple.getOS() == llvm::Triple::CloudABI;
+  // CloudABI and WebAssembly use -ffunction-sections and -fdata-sections by
+  // default.
+  bool UseSeparateSections = Triple.getOS() == llvm::Triple::CloudABI ||
+                             Triple.getArch() == llvm::Triple::wasm32 ||
+                             Triple.getArch() == llvm::Triple::wasm64;
 
   if (Args.hasFlag(options::OPT_ffunction_sections,
                    options::OPT_fno_function_sections, UseSeparateSections)) {
@@ -6040,8 +6059,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // doesn't handle that so rather than warning about unused flags that are
   // actually used, we'll lie by omission instead.
   // FIXME: Stop lying and consume only the appropriate driver flags
-  for (const Arg *A : Args.filtered(options::OPT_W_Group))
-    A->claim();
+  Args.ClaimAllArgs(options::OPT_W_Group);
 
   CollectArgsForIntegratedAssembler(C, Args, CmdArgs,
                                     getToolChain().getDriver());
@@ -6078,6 +6096,12 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
 
   for (const auto &A : Args) {
     if (forwardToGCC(A->getOption())) {
+      // It is unfortunate that we have to claim here, as this means
+      // we will basically never report anything interesting for
+      // platforms using a generic gcc, even if we are just using gcc
+      // to get to the assembler.
+      A->claim();
+
       // Don't forward any -g arguments to assembly steps.
       if (isa<AssembleJobAction>(JA) &&
           A->getOption().matches(options::OPT_g_Group))
@@ -6088,11 +6112,6 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
           A->getOption().matches(options::OPT_W_Group))
         continue;
 
-      // It is unfortunate that we have to claim here, as this means
-      // we will basically never report anything interesting for
-      // platforms using a generic gcc, even if we are just using gcc
-      // to get to the assembler.
-      A->claim();
       A->render(Args, CmdArgs);
     }
   }
@@ -6502,10 +6521,6 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
   std::string Linker = getToolChain().GetProgramPath(getShortName());
   ArgStringList CmdArgs;
-  CmdArgs.push_back("-flavor");
-  CmdArgs.push_back("old-gnu");
-  CmdArgs.push_back("-target");
-  CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString()));
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
@@ -6534,6 +6549,14 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   ArgStringList CmdArgs;
   CmdArgs.push_back("-flavor");
   CmdArgs.push_back("ld");
+
+  // Enable garbage collection of unused input sections by default, since code
+  // size is of particular importance. This is significantly facilitated by
+  // the enabling of -ffunction-sections and -fdata-sections in
+  // Clang::ConstructJob.
+  if (areOptimizationsEnabled(Args))
+    CmdArgs.push_back("--gc-sections");
+
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
@@ -8965,7 +8988,7 @@ void nacltools::AssemblerARM::ConstructJob(Compilation &C, const JobAction &JA,
                                            const char *LinkingOutput) const {
   const toolchains::NaClToolChain &ToolChain =
       static_cast<const toolchains::NaClToolChain &>(getToolChain());
-  InputInfo NaClMacros(ToolChain.GetNaClArmMacrosPath(), types::TY_PP_Asm,
+  InputInfo NaClMacros(types::TY_PP_Asm, ToolChain.GetNaClArmMacrosPath(),
                        "nacl-arm-macros.s");
   InputInfoList NewInputs;
   NewInputs.push_back(NaClMacros);
diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.h b/contrib/llvm/tools/clang/lib/Driver/Tools.h
index 168662f..2b137f4 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.h
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.h
@@ -82,6 +82,8 @@ private:
                         llvm::opt::ArgStringList &CmdArgs) const;
   void AddHexagonTargetArgs(const llvm::opt::ArgList &Args,
                             llvm::opt::ArgStringList &CmdArgs) const;
+  void AddWebAssemblyTargetArgs(const llvm::opt::ArgList &Args,
+                                llvm::opt::ArgStringList &CmdArgs) const;
 
   enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile };
 
@@ -238,7 +240,7 @@ namespace amdgpu {
 
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
-  Linker(const ToolChain &TC) : GnuTool("amdgpu::Linker", "lld", TC) {}
+  Linker(const ToolChain &TC) : GnuTool("amdgpu::Linker", "ld.lld", TC) {}
   bool isLinkJob() const override { return true; }
   bool hasIntegratedCPP() const override { return false; }
   void ConstructJob(Compilation &C, const JobAction &JA,
diff --git a/contrib/llvm/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/contrib/llvm/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
index 9f71168..482c0f6 100644
--- a/contrib/llvm/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
+++ b/contrib/llvm/tools/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
@@ -1077,6 +1077,9 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg,
     case CK_BuiltinFnToFnPtr:
     case CK_ZeroToOCLEvent:
       return false;
+
+    case CK_BooleanToSignedIntegral:
+      llvm_unreachable("OpenCL-specific cast in Objective-C?");
     }
   }
 
diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
index 8faab28..1118335 100644
--- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
@@ -150,7 +150,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
   if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
     return true;
   if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
-       (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName)) ||
+       (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) &&
+        // FIXME: This is a temporary workaround for the case where clang-format
+        // sets BreakBeforeParameter to avoid bin packing and this creates a
+        // completely unnecessary line break after a template type that isn't
+        // line-wrapped.
+        (Previous.NestingLevel == 1 || Style.BinPackParameters)) ||
        (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
         Previous.isNot(tok::question)) ||
        (!Style.BreakBeforeTernaryOperators &&
@@ -177,13 +182,15 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
     return true;
 
   unsigned NewLineColumn = getNewLineColumn(State);
-  if (State.Column <= NewLineColumn)
-    return false;
-
   if (Current.isMemberAccess() &&
-      State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit)
+      State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit &&
+      (State.Column > NewLineColumn ||
+       Current.NestingLevel < State.StartOfLineLevel))
     return true;
 
+  if (State.Column <= NewLineColumn)
+    return false;
+
   if (Style.AlwaysBreakBeforeMultilineStrings &&
       (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth ||
        Previous.is(tok::comma) || Current.NestingLevel < 2) &&
@@ -383,7 +390,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
     State.Stack.back().LastSpace = State.Column;
     State.Stack.back().NestedBlockIndent = State.Column;
   } else if (!Current.isOneOf(tok::comment, tok::caret) &&
-             (Previous.is(tok::comma) ||
+             ((Previous.is(tok::comma) &&
+               !Previous.is(TT_OverloadedOperator)) ||
               (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) {
     State.Stack.back().LastSpace = State.Column;
   } else if ((Previous.isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
@@ -860,7 +868,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
         (!SkipFirstExtraIndent && *I > prec::Assignment &&
          !Current.isTrailingComment()))
       NewParenState.Indent += Style.ContinuationIndentWidth;
-    if ((Previous && !Previous->opensScope()) || *I > prec::Comma)
+    if ((Previous && !Previous->opensScope()) || *I != prec::Comma)
       NewParenState.BreakBeforeParameter = false;
     State.Stack.push_back(NewParenState);
     SkipFirstExtraIndent = false;
@@ -906,8 +914,12 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
       NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth;
     }
     const FormatToken *NextNoComment = Current.getNextNonComment();
+    bool EndsInComma = Current.MatchingParen &&
+                       Current.MatchingParen->Previous &&
+                       Current.MatchingParen->Previous->is(tok::comma);
     AvoidBinPacking =
-        Current.isOneOf(TT_ArrayInitializerLSquare, TT_DictLiteral) ||
+        (Current.is(TT_ArrayInitializerLSquare) && EndsInComma) ||
+        Current.is(TT_DictLiteral) ||
         Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments ||
         (NextNoComment && NextNoComment->is(TT_DesignatedInitializerPeriod));
     if (Current.ParameterCount > 1)
diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp
index 5068fca..2689368 100644
--- a/contrib/llvm/tools/clang/lib/Format/Format.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp
@@ -583,6 +583,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
     GoogleStyle.BreakBeforeTernaryOperators = false;
+    GoogleStyle.CommentPragmas = "@(export|visibility) {";
     GoogleStyle.MaxEmptyLinesToKeep = 3;
     GoogleStyle.SpacesInContainerLiterals = false;
   } else if (Language == FormatStyle::LK_Proto) {
@@ -1238,6 +1239,8 @@ private:
           FormatTok->Type = TT_ImplicitStringLiteral;
           break;
         }
+        if (FormatTok->Type == TT_ImplicitStringLiteral)
+          break;
       }
 
       if (FormatTok->is(TT_ImplicitStringLiteral))
@@ -1901,8 +1904,9 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
       new DiagnosticOptions);
   SourceManager SourceMgr(Diagnostics, Files);
-  InMemoryFileSystem->addFile(FileName, 0,
-                              llvm::MemoryBuffer::getMemBuffer(Code, FileName));
+  InMemoryFileSystem->addFile(
+      FileName, 0, llvm::MemoryBuffer::getMemBuffer(
+                       Code, FileName, /*RequiresNullTerminator=*/false));
   FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(),
                                      clang::SrcMgr::C_User);
   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
index caff131..8fbb43b 100644
--- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
@@ -119,7 +119,9 @@ private:
       }
     }
 
-    if (Left->Previous &&
+    if (Left->is(TT_OverloadedOperatorLParen)) {
+      Contexts.back().IsExpression = false;
+    } else if (Left->Previous &&
         (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
                                  tok::kw_if, tok::kw_while, tok::l_paren,
                                  tok::comma) ||
@@ -132,9 +134,7 @@ private:
       // This is a parameter list of a lambda expression.
       Contexts.back().IsExpression = false;
     } else if (Line.InPPDirective &&
-               (!Left->Previous ||
-                !Left->Previous->isOneOf(tok::identifier,
-                                         TT_OverloadedOperator))) {
+               (!Left->Previous || !Left->Previous->is(tok::identifier))) {
       Contexts.back().IsExpression = true;
     } else if (Contexts[Contexts.size() - 2].CaretFound) {
       // This is the parameter list of an ObjC block.
@@ -199,6 +199,18 @@ private:
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
 
+        if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
+            Left->Previous && Left->Previous->is(tok::l_paren)) {
+          // Detect the case where macros are used to generate lambdas or
+          // function bodies, e.g.:
+          //   auto my_lambda = MARCO((Type *type, int i) { .. body .. });
+          for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
+            if (Tok->is(TT_BinaryOperator) &&
+                Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
+              Tok->Type = TT_PointerOrReference;
+          }
+        }
+
         if (StartsObjCMethodExpr) {
           CurrentToken->Type = TT_ObjCMethodExpr;
           if (Contexts.back().FirstObjCSelectorName) {
@@ -568,7 +580,8 @@ private:
         if (CurrentToken->isOneOf(tok::star, tok::amp))
           CurrentToken->Type = TT_PointerOrReference;
         consumeToken();
-        if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
+        if (CurrentToken &&
+            CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))
           CurrentToken->Previous->Type = TT_OverloadedOperator;
       }
       if (CurrentToken) {
@@ -1713,7 +1726,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
     if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
       return 100;
     if (Left.is(TT_JsTypeColon))
-      return 100;
+      return 35;
   }
 
   if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
@@ -2058,14 +2071,14 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
       Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
     return true;
+  if (Right.is(TT_OverloadedOperatorLParen))
+    return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
   if (Left.is(tok::comma))
     return true;
   if (Right.is(tok::comma))
     return false;
   if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
     return true;
-  if (Right.is(TT_OverloadedOperatorLParen))
-    return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
   if (Right.is(tok::colon)) {
     if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
         !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
index 94b8498..7b8f6e6 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
@@ -315,6 +315,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
   // definitions, too.
   unsigned StoredPosition = Tokens->getPosition();
   FormatToken *Tok = FormatTok;
+  const FormatToken *PrevTok = getPreviousToken();
   // Keep a stack of positions of lbrace tokens. We will
   // update information about whether an lbrace starts a
   // braced init list or a different block during the loop.
@@ -331,47 +332,53 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 
     switch (Tok->Tok.getKind()) {
     case tok::l_brace:
-      Tok->BlockKind = BK_Unknown;
+      if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
+          PrevTok->is(tok::colon))
+        // In TypeScript's TypeMemberLists, there can be semicolons between the
+        // individual members.
+        Tok->BlockKind = BK_BracedInit;
+      else
+        Tok->BlockKind = BK_Unknown;
       LBraceStack.push_back(Tok);
       break;
     case tok::r_brace:
-      if (!LBraceStack.empty()) {
-        if (LBraceStack.back()->BlockKind == BK_Unknown) {
-          bool ProbablyBracedList = false;
-          if (Style.Language == FormatStyle::LK_Proto) {
-            ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
-          } else {
-            // Using OriginalColumn to distinguish between ObjC methods and
-            // binary operators is a bit hacky.
-            bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
-                                    NextTok->OriginalColumn == 0;
-
-            // If there is a comma, semicolon or right paren after the closing
-            // brace, we assume this is a braced initializer list.  Note that
-            // regardless how we mark inner braces here, we will overwrite the
-            // BlockKind later if we parse a braced list (where all blocks
-            // inside are by default braced lists), or when we explicitly detect
-            // blocks (for example while parsing lambdas).
-            //
-            // We exclude + and - as they can be ObjC visibility modifiers.
-            ProbablyBracedList =
-                NextTok->isOneOf(tok::comma, tok::period, tok::colon,
-                                 tok::r_paren, tok::r_square, tok::l_brace,
-                                 tok::l_square, tok::l_paren, tok::ellipsis) ||
-                (NextTok->is(tok::semi) &&
-                 (!ExpectClassBody || LBraceStack.size() != 1)) ||
-                (NextTok->isBinaryOperator() && !NextIsObjCMethod);
-          }
-          if (ProbablyBracedList) {
-            Tok->BlockKind = BK_BracedInit;
-            LBraceStack.back()->BlockKind = BK_BracedInit;
-          } else {
-            Tok->BlockKind = BK_Block;
-            LBraceStack.back()->BlockKind = BK_Block;
-          }
+      if (LBraceStack.empty())
+        break;
+      if (LBraceStack.back()->BlockKind == BK_Unknown) {
+        bool ProbablyBracedList = false;
+        if (Style.Language == FormatStyle::LK_Proto) {
+          ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
+        } else {
+          // Using OriginalColumn to distinguish between ObjC methods and
+          // binary operators is a bit hacky.
+          bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
+                                  NextTok->OriginalColumn == 0;
+
+          // If there is a comma, semicolon or right paren after the closing
+          // brace, we assume this is a braced initializer list.  Note that
+          // regardless how we mark inner braces here, we will overwrite the
+          // BlockKind later if we parse a braced list (where all blocks
+          // inside are by default braced lists), or when we explicitly detect
+          // blocks (for example while parsing lambdas).
+          //
+          // We exclude + and - as they can be ObjC visibility modifiers.
+          ProbablyBracedList =
+              NextTok->isOneOf(tok::comma, tok::period, tok::colon,
+                               tok::r_paren, tok::r_square, tok::l_brace,
+                               tok::l_square, tok::l_paren, tok::ellipsis) ||
+              (NextTok->is(tok::semi) &&
+               (!ExpectClassBody || LBraceStack.size() != 1)) ||
+              (NextTok->isBinaryOperator() && !NextIsObjCMethod);
+        }
+        if (ProbablyBracedList) {
+          Tok->BlockKind = BK_BracedInit;
+          LBraceStack.back()->BlockKind = BK_BracedInit;
+        } else {
+          Tok->BlockKind = BK_Block;
+          LBraceStack.back()->BlockKind = BK_Block;
         }
-        LBraceStack.pop_back();
       }
+      LBraceStack.pop_back();
       break;
     case tok::at:
     case tok::semi:
@@ -381,14 +388,16 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
     case tok::kw_switch:
     case tok::kw_try:
     case tok::kw___try:
-      if (!LBraceStack.empty())
+      if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
         LBraceStack.back()->BlockKind = BK_Block;
       break;
     default:
       break;
     }
+    PrevTok = Tok;
     Tok = NextTok;
   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
+
   // Assume other blocks for all unclosed opening braces.
   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
     if (LBraceStack[i]->BlockKind == BK_Unknown)
@@ -841,6 +850,8 @@ void UnwrappedLineParser::parseStructuralElement() {
       // This does not apply for Java and JavaScript.
       if (Style.Language == FormatStyle::LK_Java ||
           Style.Language == FormatStyle::LK_JavaScript) {
+        if (FormatTok->is(tok::semi))
+          nextToken();
         addUnwrappedLine();
         return;
       }
@@ -986,13 +997,11 @@ bool UnwrappedLineParser::tryToParseLambda() {
     nextToken();
     return false;
   }
-  // FIXME: This is a dirty way to access the previous token. Find a better
-  // solution.
-  if (!Line->Tokens.empty() &&
-      (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
-                                        tok::kw_new, tok::kw_delete) ||
-       Line->Tokens.back().Tok->closesScope() ||
-       Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
+  const FormatToken* Previous = getPreviousToken();
+  if (Previous &&
+      (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
+                         tok::kw_delete) ||
+       Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
     nextToken();
     return false;
   }
@@ -1174,6 +1183,14 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
       nextToken();
       return !HasError;
     case tok::semi:
+      // JavaScript (or more precisely TypeScript) can have semicolons in braced
+      // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
+      // used for error recovery if we have otherwise determined that this is
+      // a braced list.
+      if (Style.Language == FormatStyle::LK_JavaScript) {
+        nextToken();
+        break;
+      }
       HasError = true;
       if (!ContinueOnSemicolons)
         return !HasError;
@@ -1792,18 +1809,22 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
     return;
   }
 
+  // Consume the "abstract" in "export abstract class".
+  if (FormatTok->is(Keywords.kw_abstract))
+    nextToken();
+
   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
-                         Keywords.kw_let, Keywords.kw_var))
+                         Keywords.kw_interface, Keywords.kw_let,
+                         Keywords.kw_var))
     return; // Fall through to parsing the corresponding structure.
 
-  if (FormatTok->is(tok::l_brace)) {
-    FormatTok->BlockKind = BK_Block;
-    parseBracedList();
-  }
-
-  while (!eof() && FormatTok->isNot(tok::semi) &&
-         FormatTok->isNot(tok::l_brace)) {
-    nextToken();
+  while (!eof() && FormatTok->isNot(tok::semi)) {
+    if (FormatTok->is(tok::l_brace)) {
+      FormatTok->BlockKind = BK_Block;
+      parseBracedList();
+    } else {
+      nextToken();
+    }
   }
 }
 
@@ -1877,6 +1898,14 @@ void UnwrappedLineParser::nextToken() {
   readToken();
 }
 
+const FormatToken *UnwrappedLineParser::getPreviousToken() {
+  // FIXME: This is a dirty way to access the previous token. Find a better
+  // solution.
+  if (!Line || Line->Tokens.empty())
+    return nullptr;
+  return Line->Tokens.back().Tok;
+}
+
 void UnwrappedLineParser::readToken() {
   bool CommentsInCurrentLine = true;
   do {
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
index a13c03f..6d40ab4 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
@@ -110,6 +110,7 @@ private:
   void addUnwrappedLine();
   bool eof() const;
   void nextToken();
+  const FormatToken *getPreviousToken();
   void readToken();
   void flushComments(bool NewlineBeforeNext);
   void pushToken(FormatToken *Tok);
diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
index 725f05b..d6e6ed2 100644
--- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
@@ -30,7 +30,7 @@ WhitespaceManager::Change::Change(
     unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
     unsigned NewlinesBefore, StringRef PreviousLinePostfix,
     StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective,
-    bool IsStartOfDeclName)
+    bool IsStartOfDeclName, bool IsInsideToken)
     : CreateReplacement(CreateReplacement),
       OriginalWhitespaceRange(OriginalWhitespaceRange),
       StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
@@ -38,8 +38,8 @@ WhitespaceManager::Change::Change(
       CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
       ContinuesPPDirective(ContinuesPPDirective),
       IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel),
-      Spaces(Spaces), IsTrailingComment(false), TokenLength(0),
-      PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
+      Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false),
+      TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
       StartOfBlockComment(nullptr), IndentationOffset(0) {}
 
 void WhitespaceManager::reset() {
@@ -55,20 +55,23 @@ void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
     return;
   Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
   Changes.push_back(
-      Change(true, Tok.WhitespaceRange, IndentLevel, Spaces, StartOfTokenColumn,
-             Newlines, "", "", Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
-             Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
+      Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel,
+             Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(),
+             InPPDirective && !Tok.IsFirst,
+             Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
+             /*IsInsideToken=*/false));
 }
 
 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
                                             bool InPPDirective) {
   if (Tok.Finalized)
     return;
-  Changes.push_back(
-      Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0,
-             /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
-             Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
-             Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
+  Changes.push_back(Change(
+      /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0,
+      /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
+      Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
+      Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
+      /*IsInsideToken=*/false));
 }
 
 void WhitespaceManager::replaceWhitespaceInToken(
@@ -81,15 +84,10 @@ void WhitespaceManager::replaceWhitespaceInToken(
   Changes.push_back(Change(
       true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)),
       IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix,
-      CurrentPrefix,
-      // If we don't add a newline this change doesn't start a comment. Thus,
-      // when we align line comments, we don't need to treat this change as one.
-      // FIXME: We still need to take this change in account to properly
-      // calculate the new length of the comment and to calculate the changes
-      // for which to do the alignment when aligning comments.
-      Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown,
+      CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown,
       InPPDirective && !Tok.IsFirst,
-      Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
+      Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
+      /*IsInsideToken=*/Newlines == 0));
 }
 
 const tooling::Replacements &WhitespaceManager::generateReplacements() {
@@ -109,6 +107,7 @@ const tooling::Replacements &WhitespaceManager::generateReplacements() {
 
 void WhitespaceManager::calculateLineBreakInformation() {
   Changes[0].PreviousEndOfTokenColumn = 0;
+  Change *LastOutsideTokenChange = &Changes[0];
   for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
     unsigned OriginalWhitespaceStart =
         SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
@@ -119,11 +118,20 @@ void WhitespaceManager::calculateLineBreakInformation() {
                                  Changes[i].PreviousLinePostfix.size() +
                                  Changes[i - 1].CurrentLinePrefix.size();
 
+    // If there are multiple changes in this token, sum up all the changes until
+    // the end of the line.
+    if (Changes[i - 1].IsInsideToken)
+      LastOutsideTokenChange->TokenLength +=
+          Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
+    else
+      LastOutsideTokenChange = &Changes[i - 1];
+
     Changes[i].PreviousEndOfTokenColumn =
         Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
 
     Changes[i - 1].IsTrailingComment =
-        (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) &&
+        (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof ||
+         (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) &&
         Changes[i - 1].Kind == tok::comment;
   }
   // FIXME: The last token is currently not always an eof token; in those
@@ -133,6 +141,10 @@ void WhitespaceManager::calculateLineBreakInformation() {
 
   const WhitespaceManager::Change *LastBlockComment = nullptr;
   for (auto &Change : Changes) {
+    // Reset the IsTrailingComment flag for changes inside of trailing comments
+    // so they don't get realigned later.
+    if (Change.IsInsideToken)
+      Change.IsTrailingComment = false;
     Change.StartOfBlockComment = nullptr;
     Change.IndentationOffset = 0;
     if (Change.Kind == tok::comment) {
@@ -342,6 +354,12 @@ void WhitespaceManager::alignTrailingComments() {
 
     unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
     unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
+
+    // If we don't create a replacement for this change, we have to consider
+    // it to be immovable.
+    if (!Changes[i].CreateReplacement)
+      ChangeMaxColumn = ChangeMinColumn;
+
     if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
       ChangeMaxColumn -= 2;
     // If this comment follows an } in column 0, it probably documents the
diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
index f83971b..9ca9db6 100644
--- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
+++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
@@ -109,7 +109,8 @@ public:
            unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
            unsigned NewlinesBefore, StringRef PreviousLinePostfix,
            StringRef CurrentLinePrefix, tok::TokenKind Kind,
-           bool ContinuesPPDirective, bool IsStartOfDeclName);
+           bool ContinuesPPDirective, bool IsStartOfDeclName,
+           bool IsInsideToken);
 
     bool CreateReplacement;
     // Changes might be in the middle of a token, so we cannot just keep the
@@ -139,6 +140,10 @@ public:
     // comments. Uncompensated negative offset is truncated to 0.
     int Spaces;
 
+    // If this change is inside of a token but not at the start of the token or
+    // directly after a newline.
+    bool IsInsideToken;
+
     // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
     // \c EscapedNewlineColumn will be calculated in
     // \c calculateLineBreakInformation.
diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
index 3a32f47..237a447 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1,4 +1,4 @@
-//===--- 
+//===--- CompilerInvocation.cpp -------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -2202,8 +2202,11 @@ std::string CompilerInvocation::getModuleHash() const {
     code = hash_combine(code, I->first, I->second);
   }
 
-  // Extend the signature with the sysroot.
-  code = hash_combine(code, hsOpts.Sysroot, hsOpts.UseBuiltinIncludes,
+  // Extend the signature with the sysroot and other header search options.
+  code = hash_combine(code, hsOpts.Sysroot,
+                      hsOpts.ModuleFormat,
+                      hsOpts.UseDebugInfo,
+                      hsOpts.UseBuiltinIncludes,
                       hsOpts.UseStandardSystemIncludes,
                       hsOpts.UseStandardCXXIncludes,
                       hsOpts.UseLibcxx);
diff --git a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
index d6c88d2..407ccea 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
@@ -187,15 +187,17 @@ collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
     return std::error_code();
 
   // Add includes for each of these headers.
-  for (Module::Header &H : Module->Headers[Module::HK_Normal]) {
-    Module->addTopHeader(H.Entry);
-    // Use the path as specified in the module map file. We'll look for this
-    // file relative to the module build directory (the directory containing
-    // the module map file) so this will find the same file that we found
-    // while parsing the module map.
-    if (std::error_code Err = addHeaderInclude(H.NameAsWritten, Includes,
-                                               LangOpts, Module->IsExternC))
-      return Err;
+  for (auto HK : {Module::HK_Normal, Module::HK_Private}) {
+    for (Module::Header &H : Module->Headers[HK]) {
+      Module->addTopHeader(H.Entry);
+      // Use the path as specified in the module map file. We'll look for this
+      // file relative to the module build directory (the directory containing
+      // the module map file) so this will find the same file that we found
+      // while parsing the module map.
+      if (std::error_code Err = addHeaderInclude(H.NameAsWritten, Includes,
+                                                 LangOpts, Module->IsExternC))
+        return Err;
+    }
   }
   // Note that Module->PrivateHeaders will not be a TopHeader.
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp b/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
index 12c8524..f8b73e9 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
@@ -119,6 +119,7 @@ public:
                               const FunctionDecl *Delete) override;
   void CompletedImplicitDefinition(const FunctionDecl *D) override;
   void StaticDataMemberInstantiated(const VarDecl *D) override;
+  void DefaultArgumentInstantiated(const ParmVarDecl *D) override;
   void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                     const ObjCInterfaceDecl *IFD) override;
   void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
@@ -193,6 +194,11 @@ void MultiplexASTMutationListener::StaticDataMemberInstantiated(
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->StaticDataMemberInstantiated(D);
 }
+void MultiplexASTMutationListener::DefaultArgumentInstantiated(
+                                                         const ParmVarDecl *D) {
+  for (size_t i = 0, e = Listeners.size(); i != e; ++i)
+    Listeners[i]->DefaultArgumentInstantiated(D);
+}
 void MultiplexASTMutationListener::AddedObjCCategoryToInterface(
                                                  const ObjCCategoryDecl *CatD,
                                                  const ObjCInterfaceDecl *IFD) {
diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h
index dc0dcbc..a5b4f74 100644
--- a/contrib/llvm/tools/clang/lib/Headers/altivec.h
+++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h
@@ -1891,6 +1891,22 @@ static vector float __ATTRS_o_ai vec_ctf(vector unsigned int __a, int __b) {
   return __builtin_altivec_vcfux((vector int)__a, __b);
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_ctf(vector unsigned long long __a,
+                                          int __b) {
+  vector double __ret = __builtin_convertvector(__a, vector double);
+  __ret *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52);
+  return __ret;
+}
+
+static vector double __ATTRS_o_ai vec_ctf(vector signed long long __a,
+                                          int __b) {
+  vector double __ret = __builtin_convertvector(__a, vector double);
+  __ret *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52);
+  return __ret;
+}
+#endif
+
 /* vec_vcfsx */
 
 static vector float __attribute__((__always_inline__))
@@ -1907,11 +1923,18 @@ vec_vcfux(vector unsigned int __a, int __b) {
 
 /* vec_cts */
 
-static vector int __attribute__((__always_inline__))
-vec_cts(vector float __a, int __b) {
+static vector int __ATTRS_o_ai vec_cts(vector float __a, int __b) {
   return __builtin_altivec_vctsxs(__a, __b);
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai vec_cts(vector double __a,
+                                                    int __b) {
+  __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52);
+  return __builtin_convertvector(__a, vector signed long long);
+}
+#endif
+
 /* vec_vctsxs */
 
 static vector int __attribute__((__always_inline__))
@@ -1921,11 +1944,18 @@ vec_vctsxs(vector float __a, int __b) {
 
 /* vec_ctu */
 
-static vector unsigned int __attribute__((__always_inline__))
-vec_ctu(vector float __a, int __b) {
+static vector unsigned int __ATTRS_o_ai vec_ctu(vector float __a, int __b) {
   return __builtin_altivec_vctuxs(__a, __b);
 }
 
+#ifdef __VSX__
+static vector unsigned long long __ATTRS_o_ai vec_ctu(vector double __a,
+                                                      int __b) {
+  __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52);
+  return __builtin_convertvector(__a, vector unsigned long long);
+}
+#endif
+
 /* vec_vctuxs */
 
 static vector unsigned int __attribute__((__always_inline__))
diff --git a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
index 8a686a7..2d005dd 100644
--- a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
@@ -153,8 +153,7 @@ std::string HeaderSearch::getModuleFileName(StringRef ModuleName,
     auto FileName = llvm::sys::path::filename(ModuleMapPath);
 
     llvm::hash_code Hash =
-      llvm::hash_combine(DirName.lower(), FileName.lower(),
-                         HSOpts->ModuleFormat, HSOpts->UseDebugInfo);
+      llvm::hash_combine(DirName.lower(), FileName.lower());
 
     SmallString<128> HashStr;
     llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36);
diff --git a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
index 1e7858a..5b1c493 100644
--- a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
@@ -983,6 +983,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
 ///         u' c-char-sequence '
 ///         U' c-char-sequence '
 ///         L' c-char-sequence '
+///         u8' c-char-sequence ' [C++1z lex.ccon]
 ///       c-char-sequence:
 ///         c-char
 ///         c-char-sequence c-char
diff --git a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
index 3134790..afb41a2 100644
--- a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
@@ -876,6 +876,22 @@ struct PragmaDebugHandler : public PragmaHandler {
       Crasher.setKind(tok::annot_pragma_parser_crash);
       Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
       PP.EnterToken(Crasher);
+    } else if (II->isStr("dump")) {
+      Token Identifier;
+      PP.LexUnexpandedToken(Identifier);
+      if (auto *DumpII = Identifier.getIdentifierInfo()) {
+        Token DumpAnnot;
+        DumpAnnot.startToken();
+        DumpAnnot.setKind(tok::annot_pragma_dump);
+        DumpAnnot.setAnnotationRange(
+            SourceRange(Tok.getLocation(), Identifier.getLocation()));
+        DumpAnnot.setAnnotationValue(DumpII);
+        PP.DiscardUntilEndOfDirective();
+        PP.EnterToken(DumpAnnot);
+      } else {
+        PP.Diag(Identifier, diag::warn_pragma_debug_missing_argument)
+            << II->getName();
+      }
     } else if (II->isStr("llvm_fatal_error")) {
       llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error");
     } else if (II->isStr("llvm_unreachable")) {
@@ -887,7 +903,8 @@ struct PragmaDebugHandler : public PragmaHandler {
       if (MacroII)
         PP.dumpMacroInfo(MacroII);
       else
-        PP.Diag(MacroName, diag::warn_pragma_diagnostic_invalid);
+        PP.Diag(MacroName, diag::warn_pragma_debug_missing_argument)
+            << II->getName();
     } else if (II->isStr("overflow_stack")) {
       DebugOverflowStack();
     } else if (II->isStr("handle_crash")) {
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
index e69bb27..c64b97d 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
@@ -3326,6 +3326,15 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
     case tok::kw___bool:
       isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy);
       break;
+    case tok::kw_pipe:
+      if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200)) {
+        // OpenCL 2.0 defined this keyword. OpenCL 1.2 and earlier should
+        // support the "pipe" word as identifier.
+        Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
+        goto DoneWithDeclSpec;
+      }
+      isInvalid = DS.SetTypePipe(true, Loc, PrevSpec, DiagID, Policy);
+      break;
     case tok::kw___unknown_anytype:
       isInvalid = DS.SetTypeSpecType(TST_unknown_anytype, Loc,
                                      PrevSpec, DiagID, Policy);
@@ -4401,6 +4410,9 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   switch (Tok.getKind()) {
   default: return false;
 
+  case tok::kw_pipe:
+    return getLangOpts().OpenCL && (getLangOpts().OpenCLVersion >= 200);
+
   case tok::identifier:   // foo::bar
     // Unfortunate hack to support "Class.factoryMethod" notation.
     if (getLangOpts().ObjC1 && NextToken().is(tok::period))
@@ -4847,6 +4859,9 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang,
   if (Kind == tok::star || Kind == tok::caret)
     return true;
 
+  if ((Kind == tok::kw_pipe) && Lang.OpenCL && (Lang.OpenCLVersion >= 200))
+    return true;
+
   if (!Lang.CPlusPlus)
     return false;
 
@@ -4865,6 +4880,17 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang,
   return false;
 }
 
+// Indicates whether the given declarator is a pipe declarator.
+static bool isPipeDeclerator(const Declarator &D) {
+  const unsigned NumTypes = D.getNumTypeObjects();
+
+  for (unsigned Idx = 0; Idx != NumTypes; ++Idx)
+    if (DeclaratorChunk::Pipe == D.getTypeObject(Idx).Kind)
+      return true;
+
+  return false;
+}
+
 /// ParseDeclaratorInternal - Parse a C or C++ declarator. The direct-declarator
 /// is parsed by the function passed to it. Pass null, and the direct-declarator
 /// isn't parsed at all, making this function effectively parse the C++
@@ -4941,6 +4967,15 @@ void Parser::ParseDeclaratorInternal(Declarator &D,
   }
 
   tok::TokenKind Kind = Tok.getKind();
+
+  if (D.getDeclSpec().isTypeSpecPipe() && !isPipeDeclerator(D)) {
+    DeclSpec &DS = D.getMutableDeclSpec();
+
+    D.AddTypeInfo(
+        DeclaratorChunk::getPipe(DS.getTypeQualifiers(), DS.getPipeLoc()),
+        DS.getAttributes(), SourceLocation());
+  }
+
   // Not a pointer, C++ reference, or block.
   if (!isPtrOperatorToken(Kind, getLangOpts(), D.getContext())) {
     if (DirectDeclParser)
@@ -6092,6 +6127,7 @@ void Parser::ParseMisplacedBracketDeclarator(Declarator &D) {
     case DeclaratorChunk::Reference:
     case DeclaratorChunk::BlockPointer:
     case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::Pipe:
       NeedParens = true;
       break;
     case DeclaratorChunk::Array:
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
index a4de975..3f22ad4 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
@@ -3363,7 +3363,8 @@ Parser::tryParseExceptionSpecification(bool Delayed,
     ConsumeAndStoreUntil(tok::r_paren, *ExceptionSpecTokens,
                          /*StopAtSemi=*/true,
                          /*ConsumeFinalToken=*/true);
-    SpecificationRange.setEnd(Tok.getLocation());
+    SpecificationRange.setEnd(ExceptionSpecTokens->back().getLocation());
+
     return EST_Unparsed;
   }
   
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
index 078f4c3..a08db54 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
@@ -165,8 +165,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
 ///         'distribute'
 ///         annot_pragma_openmp_end
 ///
-StmtResult
-Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
+StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
+    AllowedContsructsKind Allowed) {
   assert(Tok.is(tok::annot_pragma_openmp) && "Not an OpenMP directive!");
   ParenBraceBracketBalancer BalancerRAIIObj(*this);
   SmallVector<Expr *, 5> Identifiers;
@@ -186,6 +186,10 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
 
   switch (DKind) {
   case OMPD_threadprivate:
+    if (Allowed != ACK_Any) {
+      Diag(Tok, diag::err_omp_immediate_directive)
+          << getOpenMPDirectiveName(DKind) << 0;
+    }
     ConsumeToken();
     if (!ParseOpenMPSimpleVarList(OMPD_threadprivate, Identifiers, false)) {
       // The last seen token is annot_pragma_openmp_end - need to check for
@@ -213,7 +217,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
   case OMPD_taskwait:
   case OMPD_cancellation_point:
   case OMPD_cancel:
-    if (!StandAloneAllowed) {
+    if (Allowed == ACK_StatementsOpenMPNonStandalone) {
       Diag(Tok, diag::err_omp_immediate_directive)
           << getOpenMPDirectiveName(DKind) << 0;
     }
@@ -299,7 +303,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
     // If the depend clause is specified, the ordered construct is a stand-alone
     // directive.
     if (DKind == OMPD_ordered && FirstClauses[OMPC_depend].getInt()) {
-      if (!StandAloneAllowed) {
+      if (Allowed == ACK_StatementsOpenMPNonStandalone) {
         Diag(Loc, diag::err_omp_immediate_directive)
             << getOpenMPDirectiveName(DKind) << 1
             << getOpenMPClauseName(OMPC_depend);
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp b/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
index 4430eb8..bc70942 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
@@ -377,6 +377,14 @@ void Parser::HandlePragmaAlign() {
   Actions.ActOnPragmaOptionsAlign(Kind, PragmaLoc);
 }
 
+void Parser::HandlePragmaDump() {
+  assert(Tok.is(tok::annot_pragma_dump));
+  IdentifierInfo *II =
+      reinterpret_cast<IdentifierInfo *>(Tok.getAnnotationValue());
+  Actions.ActOnPragmaDump(getCurScope(), Tok.getLocation(), II);
+  ConsumeToken();
+}
+
 void Parser::HandlePragmaWeak() {
   assert(Tok.is(tok::annot_pragma_weak));
   SourceLocation PragmaLoc = ConsumeToken();
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
index 717bcff..edf0dda 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
@@ -32,14 +32,18 @@ using namespace clang;
 
 /// \brief Parse a standalone statement (for instance, as the body of an 'if',
 /// 'while', or 'for').
-StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc) {
+StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc,
+                                  bool AllowOpenMPStandalone) {
   StmtResult Res;
 
   // We may get back a null statement if we found a #pragma. Keep going until
   // we get an actual statement.
   do {
     StmtVector Stmts;
-    Res = ParseStatementOrDeclaration(Stmts, true, TrailingElseLoc);
+    Res = ParseStatementOrDeclaration(
+        Stmts, AllowOpenMPStandalone ? ACK_StatementsOpenMPAnyExecutable
+                                     : ACK_StatementsOpenMPNonStandalone,
+        TrailingElseLoc);
   } while (!Res.isInvalid() && !Res.get());
 
   return Res;
@@ -95,7 +99,8 @@ StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc) {
 /// [OBC]   '@' 'throw' ';'
 ///
 StmtResult
-Parser::ParseStatementOrDeclaration(StmtVector &Stmts, bool OnlyStatement,
+Parser::ParseStatementOrDeclaration(StmtVector &Stmts,
+                                    AllowedContsructsKind Allowed,
                                     SourceLocation *TrailingElseLoc) {
 
   ParenBraceBracketBalancer BalancerRAIIObj(*this);
@@ -103,8 +108,8 @@ Parser::ParseStatementOrDeclaration(StmtVector &Stmts, bool OnlyStatement,
   ParsedAttributesWithRange Attrs(AttrFactory);
   MaybeParseCXX11Attributes(Attrs, nullptr, /*MightBeObjCMessageSend*/ true);
 
-  StmtResult Res = ParseStatementOrDeclarationAfterAttributes(Stmts,
-                                 OnlyStatement, TrailingElseLoc, Attrs);
+  StmtResult Res = ParseStatementOrDeclarationAfterAttributes(
+      Stmts, Allowed, TrailingElseLoc, Attrs);
 
   assert((Attrs.empty() || Res.isInvalid() || Res.isUsable()) &&
          "attributes on empty statement");
@@ -146,7 +151,7 @@ private:
 
 StmtResult
 Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts,
-          bool OnlyStatement, SourceLocation *TrailingElseLoc,
+          AllowedContsructsKind Allowed, SourceLocation *TrailingElseLoc,
           ParsedAttributesWithRange &Attrs) {
   const char *SemiError = nullptr;
   StmtResult Res;
@@ -202,7 +207,8 @@ Retry:
   }
 
   default: {
-    if ((getLangOpts().CPlusPlus || !OnlyStatement) && isDeclarationStatement()) {
+    if ((getLangOpts().CPlusPlus || Allowed == ACK_Any) &&
+        isDeclarationStatement()) {
       SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
       DeclGroupPtrTy Decl = ParseDeclaration(Declarator::BlockContext,
                                              DeclEnd, Attrs);
@@ -346,7 +352,7 @@ Retry:
 
   case tok::annot_pragma_openmp:
     ProhibitAttributes(Attrs);
-    return ParseOpenMPDeclarativeOrExecutableDirective(!OnlyStatement);
+    return ParseOpenMPDeclarativeOrExecutableDirective(Allowed);
 
   case tok::annot_pragma_ms_pointers_to_members:
     ProhibitAttributes(Attrs);
@@ -365,7 +371,11 @@ Retry:
 
   case tok::annot_pragma_loop_hint:
     ProhibitAttributes(Attrs);
-    return ParsePragmaLoopHint(Stmts, OnlyStatement, TrailingElseLoc, Attrs);
+    return ParsePragmaLoopHint(Stmts, Allowed, TrailingElseLoc, Attrs);
+
+  case tok::annot_pragma_dump:
+    HandlePragmaDump();
+    return StmtEmpty();
   }
 
   // If we reached this code, the statement must end in a semicolon.
@@ -583,7 +593,8 @@ StmtResult Parser::ParseLabeledStatement(ParsedAttributesWithRange &attrs) {
       // can't handle GNU attributes), so only call it in the one case where
       // GNU attributes are allowed.
       SubStmt = ParseStatementOrDeclarationAfterAttributes(
-          Stmts, /*OnlyStmts*/ true, nullptr, TempAttrs);
+          Stmts, /*Allowed=*/ACK_StatementsOpenMPNonStandalone, nullptr,
+          TempAttrs);
       if (!TempAttrs.empty() && !SubStmt.isInvalid())
         SubStmt = Actions.ProcessStmtAttributes(
             SubStmt.get(), TempAttrs.getList(), TempAttrs.Range);
@@ -722,7 +733,8 @@ StmtResult Parser::ParseCaseStatement(bool MissingCase, ExprResult Expr) {
     // continue parsing the sub-stmt.
     if (Case.isInvalid()) {
       if (TopLevelCase.isInvalid())  // No parsed case stmts.
-        return ParseStatement();
+        return ParseStatement(/*TrailingElseLoc=*/nullptr,
+                              /*AllowOpenMPStandalone=*/true);
       // Otherwise, just don't add it as a nested case.
     } else {
       // If this is the first case statement we parsed, it becomes TopLevelCase.
@@ -742,7 +754,8 @@ StmtResult Parser::ParseCaseStatement(bool MissingCase, ExprResult Expr) {
   StmtResult SubStmt;
 
   if (Tok.isNot(tok::r_brace)) {
-    SubStmt = ParseStatement();
+    SubStmt = ParseStatement(/*TrailingElseLoc=*/nullptr,
+                             /*AllowOpenMPStandalone=*/true);
   } else {
     // Nicely diagnose the common error "switch (X) { case 4: }", which is
     // not valid.  If ColonLoc doesn't point to a valid text location, there was
@@ -794,7 +807,8 @@ StmtResult Parser::ParseDefaultStatement() {
   StmtResult SubStmt;
 
   if (Tok.isNot(tok::r_brace)) {
-    SubStmt = ParseStatement();
+    SubStmt = ParseStatement(/*TrailingElseLoc=*/nullptr,
+                             /*AllowOpenMPStandalone=*/true);
   } else {
     // Diagnose the common error "switch (X) {... default: }", which is
     // not valid.
@@ -893,6 +907,9 @@ void Parser::ParseCompoundStatementLeadingPragmas() {
     case tok::annot_pragma_ms_vtordisp:
       HandlePragmaMSVtorDisp();
       break;
+    case tok::annot_pragma_dump:
+      HandlePragmaDump();
+      break;
     default:
       checkForPragmas = false;
       break;
@@ -965,7 +982,7 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) {
 
     StmtResult R;
     if (Tok.isNot(tok::kw___extension__)) {
-      R = ParseStatementOrDeclaration(Stmts, false);
+      R = ParseStatementOrDeclaration(Stmts, ACK_Any);
     } else {
       // __extension__ can start declarations and it can also be a unary
       // operator for expressions.  Consume multiple __extension__ markers here
@@ -1861,7 +1878,8 @@ StmtResult Parser::ParseReturnStatement() {
   return Actions.ActOnReturnStmt(ReturnLoc, R.get(), getCurScope());
 }
 
-StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts, bool OnlyStatement,
+StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts,
+                                       AllowedContsructsKind Allowed,
                                        SourceLocation *TrailingElseLoc,
                                        ParsedAttributesWithRange &Attrs) {
   // Create temporary attribute list.
@@ -1884,7 +1902,7 @@ StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts, bool OnlyStatement,
   MaybeParseCXX11Attributes(Attrs);
 
   StmtResult S = ParseStatementOrDeclarationAfterAttributes(
-      Stmts, OnlyStatement, TrailingElseLoc, Attrs);
+      Stmts, Allowed, TrailingElseLoc, Attrs);
 
   Attrs.takeAllFrom(TempAttrs);
   return S;
@@ -2182,7 +2200,7 @@ void Parser::ParseMicrosoftIfExistsStatement(StmtVector &Stmts) {
 
   // Condition is true, parse the statements.
   while (Tok.isNot(tok::r_brace)) {
-    StmtResult R = ParseStatementOrDeclaration(Stmts, false);
+    StmtResult R = ParseStatementOrDeclaration(Stmts, ACK_Any);
     if (R.isUsable())
       Stmts.push_back(R.get());
   }
diff --git a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
index b3eeb9d..ccefb3d 100644
--- a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
@@ -668,6 +668,9 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
   case tok::annot_pragma_ms_pragma:
     HandlePragmaMSPragma();
     return DeclGroupPtrTy();
+  case tok::annot_pragma_dump:
+    HandlePragmaDump();
+    return DeclGroupPtrTy();
   case tok::semi:
     // Either a C++11 empty-declaration or attribute-declaration.
     SingleDecl = Actions.ActOnEmptyDeclaration(getCurScope(),
diff --git a/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp b/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp
index d664d87..6f6c4ca 100644
--- a/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp
@@ -270,6 +270,7 @@ bool Declarator::isDeclarationOfFunction() const {
     case DeclaratorChunk::Array:
     case DeclaratorChunk::BlockPointer:
     case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::Pipe:
       return false;
     }
     llvm_unreachable("Invalid type chunk");
@@ -713,6 +714,22 @@ bool DeclSpec::SetTypeAltiVecVector(bool isAltiVecVector, SourceLocation Loc,
   return false;
 }
 
+bool DeclSpec::SetTypePipe(bool isPipe, SourceLocation Loc,
+                           const char *&PrevSpec, unsigned &DiagID,
+                           const PrintingPolicy &Policy) {
+
+  if (TypeSpecType != TST_unspecified) {
+    PrevSpec = DeclSpec::getSpecifierName((TST)TypeSpecType, Policy);
+    DiagID = diag::err_invalid_decl_spec_combination;
+    return true;
+  }
+
+  if (isPipe) {
+    TypeSpecPipe = TSP_pipe;
+  }
+  return false;
+}
+
 bool DeclSpec::SetTypeAltiVecPixel(bool isAltiVecPixel, SourceLocation Loc,
                           const char *&PrevSpec, unsigned &DiagID,
                           const PrintingPolicy &Policy) {
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp
index 07b0589..ad1d7da 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp
@@ -2105,6 +2105,7 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle,
         && (SrcExpr.get()->getType()->isIntegerType()
             || SrcExpr.get()->getType()->isFloatingType())) {
       Kind = CK_VectorSplat;
+      SrcExpr = Self.prepareVectorSplat(DestType, SrcExpr.get());
       return;
     }
 
@@ -2339,6 +2340,7 @@ void CastOperation::CheckCStyleCast() {
     if (DestVecTy->getVectorKind() == VectorType::AltiVecVector &&
           (SrcType->isIntegerType() || SrcType->isFloatingType())) {
       Kind = CK_VectorSplat;
+      SrcExpr = Self.prepareVectorSplat(DestType, SrcExpr.get());
     } else if (Self.CheckVectorCast(OpRange, DestType, SrcType, Kind)) {
       SrcExpr = ExprError();
     }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
index cbdcb5e..6c2834b 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
@@ -6243,7 +6243,8 @@ static IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) {
 
     IntRange OutputTypeRange = IntRange::forValueOfType(C, GetExprType(CE));
 
-    bool isIntegerCast = (CE->getCastKind() == CK_IntegralCast);
+    bool isIntegerCast = CE->getCastKind() == CK_IntegralCast ||
+                         CE->getCastKind() == CK_BooleanToSignedIntegral;
 
     // Assume that non-integer casts can span the full range of the type.
     if (!isIntegerCast)
@@ -7047,6 +7048,10 @@ static void DiagnoseNullConversion(Sema &S, Expr *E, QualType T,
                         E->getExprLoc()))
     return;
 
+  // Don't warn on functions which have return type nullptr_t.
+  if (isa<CallExpr>(E))
+    return;
+
   // Check for NULL (GNUNull) or nullptr (CXX11_nullptr).
   const Expr::NullPointerConstantKind NullKind =
       E->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull);
@@ -7062,8 +7067,12 @@ static void DiagnoseNullConversion(Sema &S, Expr *E, QualType T,
 
   // __null is usually wrapped in a macro.  Go up a macro if that is the case.
   if (NullKind == Expr::NPCK_GNUNull) {
-    if (Loc.isMacroID())
-      Loc = S.SourceMgr.getImmediateExpansionRange(Loc).first;
+    if (Loc.isMacroID()) {
+      StringRef MacroName =
+          Lexer::getImmediateMacroName(Loc, S.SourceMgr, S.getLangOpts());
+      if (MacroName == "NULL")
+        Loc = S.SourceMgr.getImmediateExpansionRange(Loc).first;
+    }
   }
 
   // Only warn if the null and context location are in the same macro expansion.
@@ -7845,6 +7854,10 @@ void Sema::CheckBoolLikeConversion(Expr *E, SourceLocation CC) {
 void Sema::CheckForIntOverflow (Expr *E) {
   if (isa<BinaryOperator>(E->IgnoreParenCasts()))
     E->IgnoreParenCasts()->EvaluateForOverflow(Context);
+  else if (auto InitList = dyn_cast<InitListExpr>(E))
+    for (Expr *E : InitList->inits())
+      if (isa<BinaryOperator>(E->IgnoreParenCasts()))
+        E->IgnoreParenCasts()->EvaluateForOverflow(Context);
 }
 
 namespace {
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
index f27fb2b1..f95d106 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
@@ -3962,9 +3962,6 @@ static bool CheckAnonMemberRedeclaration(Sema &SemaRef,
                  Sema::ForRedeclaration);
   if (!SemaRef.LookupName(R, S)) return false;
 
-  if (R.getAsSingle<TagDecl>())
-    return false;
-
   // Pick a representative declaration.
   NamedDecl *PrevDecl = R.getRepresentativeDecl()->getUnderlyingDecl();
   assert(PrevDecl && "Expected a non-null Decl");
@@ -4675,11 +4672,13 @@ bool Sema::DiagnoseClassNameShadow(DeclContext *DC,
                                    DeclarationNameInfo NameInfo) {
   DeclarationName Name = NameInfo.getName();
 
-  if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC)) 
-    if (Record->getIdentifier() && Record->getDeclName() == Name) {
-      Diag(NameInfo.getLoc(), diag::err_member_name_of_class) << Name;
-      return true;
-    }
+  CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC);
+  while (Record && Record->isAnonymousStructOrUnion())
+    Record = dyn_cast<CXXRecordDecl>(Record->getParent());
+  if (Record && Record->getIdentifier() && Record->getDeclName() == Name) {
+    Diag(NameInfo.getLoc(), diag::err_member_name_of_class) << Name;
+    return true;
+  }
 
   return false;
 }
@@ -8257,6 +8256,23 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     for (auto Param : NewFD->params())
       checkIsValidOpenCLKernelParameter(*this, D, Param, ValidTypes);
   }
+  for (FunctionDecl::param_iterator PI = NewFD->param_begin(),
+       PE = NewFD->param_end(); PI != PE; ++PI) {
+    ParmVarDecl *Param = *PI;
+    QualType PT = Param->getType();
+
+    // OpenCL 2.0 pipe restrictions forbids pipe packet types to be non-value
+    // types.
+    if (getLangOpts().OpenCLVersion >= 200) {
+      if(const PipeType *PipeTy = PT->getAs<PipeType>()) {
+        QualType ElemTy = PipeTy->getElementType();
+          if (ElemTy->isReferenceType() || ElemTy->isPointerType()) {
+            Diag(Param->getTypeSpecStartLoc(), diag::err_reference_pipe_type );
+            D.setInvalidType();
+          }
+      }
+    }
+  }
 
   MarkUnusedFileScopedDecl(NewFD);
 
@@ -11799,6 +11815,28 @@ static bool isAcceptableTagRedeclContext(Sema &S, DeclContext *OldDC,
   return false;
 }
 
+/// Find the DeclContext in which a tag is implicitly declared if we see an
+/// elaborated type specifier in the specified context, and lookup finds
+/// nothing.
+static DeclContext *getTagInjectionContext(DeclContext *DC) {
+  while (!DC->isFileContext() && !DC->isFunctionOrMethod())
+    DC = DC->getParent();
+  return DC;
+}
+
+/// Find the Scope in which a tag is implicitly declared if we see an
+/// elaborated type specifier in the specified context, and lookup finds
+/// nothing.
+static Scope *getTagInjectionScope(Scope *S, const LangOptions &LangOpts) {
+  while (S->isClassScope() ||
+         (LangOpts.CPlusPlus &&
+          S->isFunctionPrototypeScope()) ||
+         ((S->getFlags() & Scope::DeclScope) == 0) ||
+         (S->getEntity() && S->getEntity()->isTransparentContext()))
+    S = S->getParent();
+  return S;
+}
+
 /// \brief This is invoked when we see 'struct foo' or 'struct {'.  In the
 /// former case, Name will be non-null.  In the later case, Name will be null.
 /// TagSpec indicates what kind of tag this is. TUK indicates whether this is a
@@ -12115,16 +12153,10 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
       // Find the context where we'll be declaring the tag.
       // FIXME: We would like to maintain the current DeclContext as the
       // lexical context,
-      while (!SearchDC->isFileContext() && !SearchDC->isFunctionOrMethod())
-        SearchDC = SearchDC->getParent();
+      SearchDC = getTagInjectionContext(SearchDC);
 
       // Find the scope where we'll be declaring the tag.
-      while (S->isClassScope() ||
-             (getLangOpts().CPlusPlus &&
-              S->isFunctionPrototypeScope()) ||
-             ((S->getFlags() & Scope::DeclScope) == 0) ||
-             (S->getEntity() && S->getEntity()->isTransparentContext()))
-        S = S->getParent();
+      S = getTagInjectionScope(S, getLangOpts());
     } else {
       assert(TUK == TUK_Friend);
       // C++ [namespace.memdef]p3:
@@ -12284,7 +12316,8 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
             } else if (TUK == TUK_Reference &&
                        (PrevTagDecl->getFriendObjectKind() ==
                             Decl::FOK_Undeclared ||
-                        getOwningModule(PrevDecl) !=
+                        PP.getModuleContainingLocation(
+                            PrevDecl->getLocation()) !=
                             PP.getModuleContainingLocation(KWLoc)) &&
                        SS.isEmpty()) {
               // This declaration is a reference to an existing entity, but
@@ -12294,14 +12327,12 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
               // the declaration would have meant the same thing if no prior
               // declaration were found, that is, if it was found in the same
               // scope where we would have injected a declaration.
-              DeclContext *InjectedDC = CurContext;
-              while (!InjectedDC->isFileContext() &&
-                     !InjectedDC->isFunctionOrMethod())
-                InjectedDC = InjectedDC->getParent();
-              if (!InjectedDC->getRedeclContext()->Equals(
-                  PrevDecl->getDeclContext()->getRedeclContext()))
+              if (!getTagInjectionContext(CurContext)->getRedeclContext()
+                       ->Equals(PrevDecl->getDeclContext()->getRedeclContext()))
                 return PrevTagDecl;
-              // This is in the injected scope, create a new declaration.
+              // This is in the injected scope, create a new declaration in
+              // that scope.
+              S = getTagInjectionScope(S, getLangOpts());
             } else {
               return PrevTagDecl;
             }
@@ -12603,7 +12634,7 @@ CreateNewDecl:
             << Name;
         Invalid = true;
       }
-    } else {
+    } else if (!PrevDecl) {
       Diag(Loc, diag::warn_decl_in_param_list) << Context.getTagDeclType(New);
     }
     DeclsInPrototypeScope.push_back(New);
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
index 5a0f0f8..f94c822 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
@@ -348,6 +348,25 @@ static void handleSimpleAttribute(Sema &S, Decl *D,
                                         Attr.getAttributeSpellingListIndex()));
 }
 
+template <typename AttrType>
+static void handleSimpleAttributeWithExclusions(Sema &S, Decl *D,
+                                                const AttributeList &Attr) {
+  handleSimpleAttribute<AttrType>(S, D, Attr);
+}
+
+/// \brief Applies the given attribute to the Decl so long as the Decl doesn't
+/// already have one of the given incompatible attributes.
+template <typename AttrType, typename IncompatibleAttrType,
+          typename... IncompatibleAttrTypes>
+static void handleSimpleAttributeWithExclusions(Sema &S, Decl *D,
+                                                const AttributeList &Attr) {
+  if (checkAttrMutualExclusion<IncompatibleAttrType>(S, D, Attr.getRange(),
+                                                     Attr.getName()))
+    return;
+  handleSimpleAttributeWithExclusions<AttrType, IncompatibleAttrTypes...>(S, D,
+                                                                          Attr);
+}
+
 /// \brief Check if the passed-in expression is of type int or bool.
 static bool isIntOrBool(Expr *Exp) {
   QualType QT = Exp->getType();
@@ -3588,6 +3607,12 @@ static void handleOptimizeNoneAttr(Sema &S, Decl *D,
 }
 
 static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  if (checkAttrMutualExclusion<CUDADeviceAttr>(S, D, Attr.getRange(),
+                                               Attr.getName()) ||
+      checkAttrMutualExclusion<CUDAHostAttr>(S, D, Attr.getRange(),
+                                             Attr.getName())) {
+    return;
+  }
   FunctionDecl *FD = cast<FunctionDecl>(D);
   if (!FD->getReturnType()->isVoidType()) {
     SourceRange RTRange = FD->getReturnTypeSourceRange();
@@ -4558,14 +4583,6 @@ static void handleInterruptAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     handleARMInterruptAttr(S, D, Attr);
 }
 
-static void handleMips16Attribute(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<MipsInterruptAttr>(S, D, Attr.getRange(),
-                                                  Attr.getName()))
-    return;
-
-  handleSimpleAttribute<Mips16Attr>(S, D, Attr);
-}
-
 static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
                                     const AttributeList &Attr) {
   uint32_t NumRegs;
@@ -4955,7 +4972,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleDLLAttr(S, D, Attr);
     break;
   case AttributeList::AT_Mips16:
-    handleMips16Attribute(S, D, Attr);
+    handleSimpleAttributeWithExclusions<Mips16Attr, MipsInterruptAttr>(S, D,
+                                                                       Attr);
     break;
   case AttributeList::AT_NoMips16:
     handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
@@ -5006,7 +5024,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleCommonAttr(S, D, Attr);
     break;
   case AttributeList::AT_CUDAConstant:
-    handleSimpleAttribute<CUDAConstantAttr>(S, D, Attr);
+    handleSimpleAttributeWithExclusions<CUDAConstantAttr, CUDASharedAttr>(S, D,
+                                                                          Attr);
     break;
   case AttributeList::AT_PassObjectSize:
     handlePassObjectSizeAttr(S, D, Attr);
@@ -5051,10 +5070,12 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleGlobalAttr(S, D, Attr);
     break;
   case AttributeList::AT_CUDADevice:
-    handleSimpleAttribute<CUDADeviceAttr>(S, D, Attr);
+    handleSimpleAttributeWithExclusions<CUDADeviceAttr, CUDAGlobalAttr>(S, D,
+                                                                        Attr);
     break;
   case AttributeList::AT_CUDAHost:
-    handleSimpleAttribute<CUDAHostAttr>(S, D, Attr);
+    handleSimpleAttributeWithExclusions<CUDAHostAttr, CUDAGlobalAttr>(S, D,
+                                                                      Attr);
     break;
   case AttributeList::AT_GNUInline:
     handleGNUInlineAttr(S, D, Attr);
@@ -5114,7 +5135,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleSimpleAttribute<NoThrowAttr>(S, D, Attr);
     break;
   case AttributeList::AT_CUDAShared:
-    handleSimpleAttribute<CUDASharedAttr>(S, D, Attr);
+    handleSimpleAttributeWithExclusions<CUDASharedAttr, CUDAConstantAttr>(S, D,
+                                                                          Attr);
     break;
   case AttributeList::AT_VecReturn:
     handleVecReturnAttr(S, D, Attr);
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
index 02091a7..11f2329 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
@@ -7000,6 +7000,7 @@ void Sema::CheckConversionDeclarator(Declarator &D, QualType &R,
       case DeclaratorChunk::BlockPointer:
       case DeclaratorChunk::Reference:
       case DeclaratorChunk::MemberPointer:
+      case DeclaratorChunk::Pipe:
         extendLeft(Before, Chunk.getSourceRange());
         break;
 
@@ -7796,6 +7797,10 @@ bool Sema::CheckUsingShadowDecl(UsingDecl *Using, NamedDecl *Orig,
       if (UsingShadowDecl *Shadow = dyn_cast<UsingShadowDecl>(*I))
         PrevShadow = Shadow;
       FoundEquivalentDecl = true;
+    } else if (isEquivalentInternalLinkageDeclaration(D, Target)) {
+      // We don't conflict with an existing using shadow decl of an equivalent
+      // declaration, but we're not a redeclaration of it.
+      FoundEquivalentDecl = true;
     }
 
     if (isVisible(D))
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
index 5d0c605..3e89af6 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
@@ -3084,6 +3084,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
     Kind = CharacterLiteral::UTF16;
   else if (Literal.isUTF32())
     Kind = CharacterLiteral::UTF32;
+  else if (Literal.isUTF8())
+    Kind = CharacterLiteral::UTF8;
 
   Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty,
                                              Tok.getLocation());
@@ -4313,10 +4315,16 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
     if (Result.isInvalid())
       return ExprError();
 
-    Expr *Arg = Result.getAs<Expr>();
-    CheckCompletedExpr(Arg, Param->getOuterLocStart());
-    // Build the default argument expression.
-    return CXXDefaultArgExpr::Create(Context, CallLoc, Param, Arg);
+    Result = ActOnFinishFullExpr(Result.getAs<Expr>(),
+                                 Param->getOuterLocStart());
+    if (Result.isInvalid())
+      return ExprError();
+
+    // Remember the instantiated default argument.
+    Param->setDefaultArg(Result.getAs<Expr>());
+    if (ASTMutationListener *L = getASTMutationListener()) {
+      L->DefaultArgumentInstantiated(Param);
+    }
   }
 
   // If the default expression creates temporaries, we need to
@@ -4929,7 +4937,9 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
       OverloadExpr *ovl = find.Expression;
       if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(ovl))
         return BuildOverloadedCallExpr(S, Fn, ULE, LParenLoc, ArgExprs,
-                                       RParenLoc, ExecConfig);
+                                       RParenLoc, ExecConfig,
+                                       /*AllowTypoCorrection=*/true,
+                                       find.IsAddressOfOperand);
       return BuildCallToMemberFunction(S, Fn, LParenLoc, ArgExprs, RParenLoc);
     }
   }
@@ -4943,10 +4953,14 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
 
   Expr *NakedFn = Fn->IgnoreParens();
 
+  bool CallingNDeclIndirectly = false;
   NamedDecl *NDecl = nullptr;
-  if (UnaryOperator *UnOp = dyn_cast<UnaryOperator>(NakedFn))
-    if (UnOp->getOpcode() == UO_AddrOf)
+  if (UnaryOperator *UnOp = dyn_cast<UnaryOperator>(NakedFn)) {
+    if (UnOp->getOpcode() == UO_AddrOf) {
+      CallingNDeclIndirectly = true;
       NakedFn = UnOp->getSubExpr()->IgnoreParens();
+    }
+  }
 
   if (isa<DeclRefExpr>(NakedFn)) {
     NDecl = cast<DeclRefExpr>(NakedFn)->getDecl();
@@ -4968,6 +4982,11 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
     NDecl = cast<MemberExpr>(NakedFn)->getMemberDecl();
 
   if (FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(NDecl)) {
+    if (CallingNDeclIndirectly &&
+        !checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
+                                           Fn->getLocStart()))
+      return ExprError();
+
     if (FD->hasAttr<EnableIfAttr>()) {
       if (const EnableIfAttr *Attr = CheckEnableIf(FD, ArgExprs, true)) {
         Diag(Fn->getLocStart(),
@@ -5583,6 +5602,39 @@ bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty,
   return false;
 }
 
+ExprResult Sema::prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr) {
+  QualType DestElemTy = VectorTy->castAs<VectorType>()->getElementType();
+
+  if (DestElemTy == SplattedExpr->getType())
+    return SplattedExpr;
+
+  assert(DestElemTy->isFloatingType() ||
+         DestElemTy->isIntegralOrEnumerationType());
+
+  CastKind CK;
+  if (VectorTy->isExtVectorType() && SplattedExpr->getType()->isBooleanType()) {
+    // OpenCL requires that we convert `true` boolean expressions to -1, but
+    // only when splatting vectors.
+    if (DestElemTy->isFloatingType()) {
+      // To avoid having to have a CK_BooleanToSignedFloating cast kind, we cast
+      // in two steps: boolean to signed integral, then to floating.
+      ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy,
+                                                 CK_BooleanToSignedIntegral);
+      SplattedExpr = CastExprRes.get();
+      CK = CK_IntegralToFloating;
+    } else {
+      CK = CK_BooleanToSignedIntegral;
+    }
+  } else {
+    ExprResult CastExprRes = SplattedExpr;
+    CK = PrepareScalarCast(CastExprRes, DestElemTy);
+    if (CastExprRes.isInvalid())
+      return ExprError();
+    SplattedExpr = CastExprRes.get();
+  }
+  return ImpCastExprToType(SplattedExpr, DestElemTy, CK);
+}
+
 ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy,
                                     Expr *CastExpr, CastKind &Kind) {
   assert(DestTy->isExtVectorType() && "Not an extended vector type!");
@@ -5613,15 +5665,8 @@ ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy,
                 diag::err_invalid_conversion_between_vector_and_scalar)
       << DestTy << SrcTy << R;
 
-  QualType DestElemTy = DestTy->getAs<ExtVectorType>()->getElementType();
-  ExprResult CastExprRes = CastExpr;
-  CastKind CK = PrepareScalarCast(CastExprRes, DestElemTy);
-  if (CastExprRes.isInvalid())
-    return ExprError();
-  CastExpr = ImpCastExprToType(CastExprRes.get(), DestElemTy, CK).get();
-
   Kind = CK_VectorSplat;
-  return CastExpr;
+  return prepareVectorSplat(DestTy, CastExpr);
 }
 
 ExprResult
@@ -6960,13 +7005,9 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
     if (RHSType->isExtVectorType())
       return Incompatible;
     if (RHSType->isArithmeticType()) {
-      // CK_VectorSplat does T -> vector T, so first cast to the
-      // element type.
-      QualType elType = cast<ExtVectorType>(LHSType)->getElementType();
-      if (elType != RHSType && ConvertRHS) {
-        Kind = PrepareScalarCast(RHS, elType);
-        RHS = ImpCastExprToType(RHS.get(), elType, Kind);
-      }
+      // CK_VectorSplat does T -> vector T, so first cast to the element type.
+      if (ConvertRHS)
+        RHS = prepareVectorSplat(LHSType, RHS.get());
       Kind = CK_VectorSplat;
       return Compatible;
     }
@@ -8184,7 +8225,7 @@ static QualType checkOpenCLVectorShift(Sema &S,
   if (RHS.isInvalid()) return QualType();
 
   QualType LHSType = LHS.get()->getType();
-  const VectorType *LHSVecTy = LHSType->getAs<VectorType>();
+  const VectorType *LHSVecTy = LHSType->castAs<VectorType>();
   QualType LHSEleType = LHSVecTy->getElementType();
 
   // Note that RHS might not be a vector.
@@ -13121,6 +13162,7 @@ bool Sema::tryCaptureVariable(
         case Type::ObjCObject:
         case Type::ObjCInterface:
         case Type::ObjCObjectPointer:
+        case Type::Pipe:
           llvm_unreachable("type class is never variably-modified!");
         case Type::Adjusted:
           QTy = cast<AdjustedType>(Ty)->getOriginalType();
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
index 2ad595f..38fbea1 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
@@ -3353,20 +3353,13 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
                              VK_RValue, /*BasePath=*/nullptr, CCK).get();
     break;
 
-  case ICK_Vector_Splat:
+  case ICK_Vector_Splat: {
     // Vector splat from any arithmetic type to a vector.
-    // Cast to the element type.
-    {
-      QualType elType = ToType->getAs<ExtVectorType>()->getElementType();
-      if (elType != From->getType()) {
-        ExprResult E = From;
-        From = ImpCastExprToType(From, elType,
-                                 PrepareScalarCast(E, elType)).get();
-      }
-      From = ImpCastExprToType(From, ToType, CK_VectorSplat,
-                               VK_RValue, /*BasePath=*/nullptr, CCK).get();
-    }
+    Expr *Elem = prepareVectorSplat(ToType, From).get();
+    From = ImpCastExprToType(Elem, ToType, CK_VectorSplat, VK_RValue,
+                             /*BasePath=*/nullptr, CCK).get();
     break;
+  }
 
   case ICK_Complex_Real:
     // Case 1.  x -> _Complex y
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
index 57a08b9..1d86ca3 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
@@ -319,6 +319,7 @@ ExprResult Sema::BuildObjCNumericLiteral(SourceLocation AtLoc, Expr *Number) {
     // to use to determine the Objective-c literal kind.
     switch (Char->getKind()) {
     case CharacterLiteral::Ascii:
+    case CharacterLiteral::UTF8:
       NumberType = Context.CharTy;
       break;
       
@@ -577,6 +578,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
       // to use to determine the Objective-c literal kind.
       switch (Char->getKind()) {
       case CharacterLiteral::Ascii:
+      case CharacterLiteral::UTF8:
         ValueType = Context.CharTy;
         break;
         
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
index 481ae6c..45dc2e3 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
@@ -650,6 +650,13 @@ void LookupResult::print(raw_ostream &Out) {
   }
 }
 
+LLVM_DUMP_METHOD void LookupResult::dump() {
+  llvm::errs() << "lookup results for " << getLookupName().getAsString()
+               << ":\n";
+  for (NamedDecl *D : *this)
+    D->dump();
+}
+
 /// \brief Lookup a builtin function, when name lookup would otherwise
 /// fail.
 static bool LookupBuiltin(Sema &S, LookupResult &R) {
@@ -2616,6 +2623,9 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
     case Type::Atomic:
       T = cast<AtomicType>(T)->getValueType().getTypePtr();
       continue;
+    case Type::Pipe:
+      T = cast<PipeType>(T)->getElementType().getTypePtr();
+      continue;
     }
 
     if (Queue.empty())
@@ -4988,3 +4998,12 @@ const Sema::TypoExprState &Sema::getTypoExprState(TypoExpr *TE) const {
 void Sema::clearDelayedTypo(TypoExpr *TE) {
   DelayedTypos.erase(TE);
 }
+
+void Sema::ActOnPragmaDump(Scope *S, SourceLocation IILoc, IdentifierInfo *II) {
+  DeclarationNameInfo Name(II, IILoc);
+  LookupResult R(*this, Name, LookupAnyName, Sema::NotForRedeclaration);
+  R.suppressDiagnostics();
+  R.setHideTags(false);
+  LookupName(R, S);
+  R.dump();
+}
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
index e0c10e4..663da0c 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
@@ -258,6 +258,7 @@ static const Expr *IgnoreNarrowingConversion(const Expr *Converted) {
     case CK_IntegralCast:
     case CK_IntegralToBoolean:
     case CK_IntegralToFloating:
+    case CK_BooleanToSignedIntegral:
     case CK_FloatingToIntegral:
     case CK_FloatingToBoolean:
     case CK_FloatingCast:
@@ -9643,6 +9644,13 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
 
   case ovl_fail_enable_if:
     return DiagnoseFailedEnableIfAttr(S, Cand);
+
+  case ovl_fail_addr_not_available: {
+    bool Available = checkAddressOfCandidateIsAvailable(S, Cand->Function);
+    (void)Available;
+    assert(!Available);
+    break;
+  }
   }
 }
 
@@ -11245,6 +11253,17 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
   return ExprError();
 }
 
+static void markUnaddressableCandidatesUnviable(Sema &S,
+                                                OverloadCandidateSet &CS) {
+  for (auto I = CS.begin(), E = CS.end(); I != E; ++I) {
+    if (I->Viable &&
+        !S.checkAddressOfFunctionIsAvailable(I->Function, /*Complain=*/false)) {
+      I->Viable = false;
+      I->FailureKind = ovl_fail_addr_not_available;
+    }
+  }
+}
+
 /// BuildOverloadedCallExpr - Given the call expression that calls Fn
 /// (which eventually refers to the declaration Func) and the call
 /// arguments Args/NumArgs, attempt to resolve the function call down
@@ -11257,7 +11276,8 @@ ExprResult Sema::BuildOverloadedCallExpr(Scope *S, Expr *Fn,
                                          MultiExprArg Args,
                                          SourceLocation RParenLoc,
                                          Expr *ExecConfig,
-                                         bool AllowTypoCorrection) {
+                                         bool AllowTypoCorrection,
+                                         bool CalleesAddressIsTaken) {
   OverloadCandidateSet CandidateSet(Fn->getExprLoc(),
                                     OverloadCandidateSet::CSK_Normal);
   ExprResult result;
@@ -11266,6 +11286,11 @@ ExprResult Sema::BuildOverloadedCallExpr(Scope *S, Expr *Fn,
                              &result))
     return result;
 
+  // If the user handed us something like `(&Foo)(Bar)`, we need to ensure that
+  // functions that aren't addressible are considered unviable.
+  if (CalleesAddressIsTaken)
+    markUnaddressableCandidatesUnviable(*this, CandidateSet);
+
   OverloadCandidateSet::iterator Best;
   OverloadingResult OverloadResult =
       CandidateSet.BestViableFunction(*this, Fn->getLocStart(), Best);
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
index 6cc8588..5715607 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
@@ -4184,6 +4184,10 @@ bool UnnamedLocalNoLinkageFinder::VisitAtomicType(const AtomicType* T) {
   return Visit(T->getValueType());
 }
 
+bool UnnamedLocalNoLinkageFinder::VisitPipeType(const PipeType* T) {
+  return false;
+}
+
 bool UnnamedLocalNoLinkageFinder::VisitTagDecl(const TagDecl *Tag) {
   if (Tag->getDeclContext()->isFunctionOrMethod()) {
     S.Diag(SR.getBegin(),
@@ -5503,6 +5507,8 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg,
 
   Expr *E;
   if (T->isAnyCharacterType()) {
+    // This does not need to handle u8 character literals because those are
+    // of type char, and so can also be covered by an ASCII character literal.
     CharacterLiteral::CharacterKind Kind;
     if (T->isWideCharType())
       Kind = CharacterLiteral::Wide;
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
index cd54920..71faafc 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -1652,6 +1652,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
     case Type::Auto:
     case Type::DependentTemplateSpecialization:
     case Type::PackExpansion:
+    case Type::Pipe:
       // No template argument deduction for these types
       return Sema::TDK_Success;
   }
@@ -4964,6 +4965,7 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
   case Type::ObjCObject:
   case Type::ObjCObjectPointer:
   case Type::UnresolvedUsing:
+  case Type::Pipe:
 #define TYPE(Class, Base)
 #define ABSTRACT_TYPE(Class, Base)
 #define DEPENDENT_TYPE(Class, Base)
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp
index 61052f0..cb67d71 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -750,6 +750,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
     case DeclaratorChunk::Pointer:
     case DeclaratorChunk::Reference:
     case DeclaratorChunk::Paren:
+    case DeclaratorChunk::Pipe:
     case DeclaratorChunk::BlockPointer:
       // These declarator chunks cannot contain any parameter packs.
       break;
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
index c70568c..f6ad132 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
@@ -335,6 +335,7 @@ static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator,
     case DeclaratorChunk::Array:
     case DeclaratorChunk::Reference:
     case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::Pipe:
       return result;
 
     // If we do find a function declarator, scan inwards from that,
@@ -347,6 +348,7 @@ static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator,
         case DeclaratorChunk::Array:
         case DeclaratorChunk::Function:
         case DeclaratorChunk::Reference:
+        case DeclaratorChunk::Pipe:
           continue;
 
         case DeclaratorChunk::MemberPointer:
@@ -427,6 +429,7 @@ static void distributeObjCPointerTypeAttr(TypeProcessingState &state,
     // Don't walk through these.
     case DeclaratorChunk::Reference:
     case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::Pipe:
       goto error;
     }
   }
@@ -459,6 +462,7 @@ distributeObjCPointerTypeAttrFromDeclarator(TypeProcessingState &state,
     case DeclaratorChunk::MemberPointer:
     case DeclaratorChunk::Paren:
     case DeclaratorChunk::Array:
+    case DeclaratorChunk::Pipe:
       continue;
 
     case DeclaratorChunk::Function:
@@ -520,6 +524,7 @@ static void distributeFunctionTypeAttr(TypeProcessingState &state,
     case DeclaratorChunk::Array:
     case DeclaratorChunk::Reference:
     case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::Pipe:
       continue;
     }
   }
@@ -1272,6 +1277,10 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
         // value being declared, poison it as invalid so we don't get chains of
         // errors.
         declarator.setInvalidType(true);
+      } else if (S.getLangOpts().OpenCLVersion >= 200 && DS.isTypeSpecPipe()){
+        S.Diag(DeclLoc, diag::err_missing_actual_pipe_type)
+          << DS.getSourceRange();
+        declarator.setInvalidType(true);
       } else {
         S.Diag(DeclLoc, diag::ext_missing_type_specifier)
           << DS.getSourceRange();
@@ -1564,7 +1573,9 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
   // Apply any type attributes from the decl spec.  This may cause the
   // list of type attributes to be temporarily saved while the type
   // attributes are pushed around.
-  processTypeAttrs(state, Result, TAL_DeclSpec, DS.getAttributes().getList());
+  // pipe attributes will be handled later ( at GetFullTypeForDeclarator )
+  if (!DS.isTypeSpecPipe())
+      processTypeAttrs(state, Result, TAL_DeclSpec, DS.getAttributes().getList());
 
   // Apply const/volatile/restrict qualifiers to T.
   if (unsigned TypeQuals = DS.getTypeQualifiers()) {
@@ -1924,6 +1935,21 @@ QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue,
   return Context.getRValueReferenceType(T);
 }
 
+/// \brief Build a Pipe type.
+///
+/// \param T The type to which we'll be building a Pipe.
+///
+/// \param Loc We do not use it for now.
+///
+/// \returns A suitable pipe type, if there are no errors. Otherwise, returns a
+/// NULL type.
+QualType Sema::BuildPipeType(QualType T, SourceLocation Loc) {
+  assert(!T->isObjCObjectType() && "Should build ObjCObjectPointerType");
+
+  // Build the pipe type.
+  return Context.getPipeType(T);
+}
+
 /// Check whether the specified array size makes the array type a VLA.  If so,
 /// return true, if not, return the size of the array in SizeVal.
 static bool isArraySizeVLA(Sema &S, Expr *ArraySize, llvm::APSInt &SizeVal) {
@@ -2393,6 +2419,7 @@ static void inferARCWriteback(TypeProcessingState &state,
     case DeclaratorChunk::Array: // suppress if written (id[])?
     case DeclaratorChunk::Function:
     case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::Pipe:
       return;
     }
   }
@@ -2532,6 +2559,7 @@ static void diagnoseRedundantReturnTypeQualifiers(Sema &S, QualType RetTy,
     case DeclaratorChunk::Reference:
     case DeclaratorChunk::Array:
     case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::Pipe:
       // FIXME: We can't currently provide an accurate source location and a
       // fix-it hint for these.
       unsigned AtomicQual = RetTy->isAtomicType() ? DeclSpec::TQ_atomic : 0;
@@ -3057,6 +3085,7 @@ static PointerDeclaratorKind classifyPointerDeclarator(Sema &S,
     switch (chunk.Kind) {
     case DeclaratorChunk::Array:
     case DeclaratorChunk::Function:
+    case DeclaratorChunk::Pipe:
       break;
 
     case DeclaratorChunk::BlockPointer:
@@ -3305,6 +3334,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
         case DeclaratorChunk::Array:
           DiagKind = 2;
           break;
+        case DeclaratorChunk::Pipe:
+          break;
         }
 
         S.Diag(DeclChunk.Loc, DiagId) << DiagKind;
@@ -3370,6 +3401,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
       switch (chunk.Kind) {
       case DeclaratorChunk::Array:
       case DeclaratorChunk::Function:
+      case DeclaratorChunk::Pipe:
         break;
 
       case DeclaratorChunk::BlockPointer:
@@ -3689,6 +3721,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
             break;
           case DeclaratorChunk::Function:
           case DeclaratorChunk::BlockPointer:
+          case DeclaratorChunk::Pipe:
             // These are invalid anyway, so just ignore.
             break;
           }
@@ -4038,7 +4071,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
 
       break;
     }
-    case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::MemberPointer: {
       // The scope spec must refer to a class, or be dependent.
       CXXScopeSpec &SS = DeclType.Mem.Scope();
       QualType ClsType;
@@ -4098,6 +4131,12 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
       break;
     }
 
+    case DeclaratorChunk::Pipe: {
+      T = S.BuildPipeType(T, DeclType.Loc );
+      break;
+    }
+    }
+
     if (T.isNull()) {
       D.setInvalidType(true);
       T = Context.IntTy;
@@ -4392,6 +4431,7 @@ static void transferARCOwnership(TypeProcessingState &state,
 
     case DeclaratorChunk::Function:
     case DeclaratorChunk::MemberPointer:
+    case DeclaratorChunk::Pipe:
       return;
     }
   }
@@ -4682,6 +4722,14 @@ namespace {
       }
     }
 
+    void VisitPipeTypeLoc(PipeTypeLoc TL) {
+      TL.setKWLoc(DS.getTypeSpecTypeLoc());
+
+      TypeSourceInfo *TInfo = 0;
+      Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
+      TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
+    }
+
     void VisitTypeLoc(TypeLoc TL) {
       // FIXME: add other typespec types and change this to an assert.
       TL.initialize(Context, DS.getTypeSpecTypeLoc());
@@ -4802,6 +4850,10 @@ namespace {
       TL.setLParenLoc(Chunk.Loc);
       TL.setRParenLoc(Chunk.EndLoc);
     }
+    void VisitPipeTypeLoc(PipeTypeLoc TL) {
+      assert(Chunk.Kind == DeclaratorChunk::Pipe);
+      TL.setKWLoc(Chunk.Loc);
+    }
 
     void VisitTypeLoc(TypeLoc TL) {
       llvm_unreachable("unsupported TypeLoc kind in declarator!");
@@ -4815,6 +4867,7 @@ static void fillAtomicQualLoc(AtomicTypeLoc ATL, const DeclaratorChunk &Chunk) {
   case DeclaratorChunk::Function:
   case DeclaratorChunk::Array:
   case DeclaratorChunk::Paren:
+  case DeclaratorChunk::Pipe:
     llvm_unreachable("cannot be _Atomic qualified");
 
   case DeclaratorChunk::Pointer:
@@ -5738,6 +5791,7 @@ static bool distributeNullabilityTypeAttr(TypeProcessingState &state,
       
     // Don't walk through these.
     case DeclaratorChunk::Reference:
+    case DeclaratorChunk::Pipe:
       return false;
     }
   }
diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
index e0a9653..935304f 100644
--- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
+++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
@@ -1046,6 +1046,9 @@ public:
   /// Subclasses may override this routine to provide different behavior.
   QualType RebuildAtomicType(QualType ValueType, SourceLocation KWLoc);
 
+  /// \brief Build a new pipe type given its value type.
+  QualType RebuildPipeType(QualType ValueType, SourceLocation KWLoc);
+
   /// \brief Build a new template name given a nested name specifier, a flag
   /// indicating whether the "template" keyword was provided, and the template
   /// that the template name refers to.
@@ -3580,7 +3583,7 @@ void TreeTransform<Derived>::InventTemplateArgumentLoc(
   case TemplateArgument::Template:
   case TemplateArgument::TemplateExpansion: {
     NestedNameSpecifierLocBuilder Builder;
-    TemplateName Template = Arg.getAsTemplate();
+    TemplateName Template = Arg.getAsTemplateOrTemplatePattern();
     if (DependentTemplateName *DTN = Template.getAsDependentTemplateName())
       Builder.MakeTrivial(SemaRef.Context, DTN->getQualifier(), Loc);
     else if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName())
@@ -5324,6 +5327,26 @@ QualType TreeTransform<Derived>::TransformAtomicType(TypeLocBuilder &TLB,
   return Result;
 }
 
+template <typename Derived>
+QualType TreeTransform<Derived>::TransformPipeType(TypeLocBuilder &TLB,
+                                                   PipeTypeLoc TL) {
+  QualType ValueType = getDerived().TransformType(TLB, TL.getValueLoc());
+  if (ValueType.isNull())
+    return QualType();
+
+  QualType Result = TL.getType();
+  if (getDerived().AlwaysRebuild() || ValueType != TL.getValueLoc().getType()) {
+    Result = getDerived().RebuildPipeType(ValueType, TL.getKWLoc());
+    if (Result.isNull())
+      return QualType();
+  }
+
+  PipeTypeLoc NewTL = TLB.push<PipeTypeLoc>(Result);
+  NewTL.setKWLoc(TL.getKWLoc());
+
+  return Result;
+}
+
   /// \brief Simple iterator that traverses the template arguments in a
   /// container that provides a \c getArgLoc() member function.
   ///
@@ -6128,7 +6151,7 @@ TreeTransform<Derived>::TransformIfStmt(IfStmt *S) {
     }
   }
 
-  Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get()));
+  Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get(), S->getIfLoc()));
   if (!S->getConditionVariable() && S->getCond() && !FullCond.get())
     return StmtError();
 
@@ -6223,7 +6246,8 @@ TreeTransform<Derived>::TransformWhileStmt(WhileStmt *S) {
     }
   }
 
-  Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get()));
+  Sema::FullExprArg FullCond(
+      getSema().MakeFullExpr(Cond.get(), S->getWhileLoc()));
   if (!S->getConditionVariable() && S->getCond() && !FullCond.get())
     return StmtError();
 
@@ -6307,7 +6331,8 @@ TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
     }
   }
 
-  Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get()));
+  Sema::FullExprArg FullCond(
+      getSema().MakeFullExpr(Cond.get(), S->getForLoc()));
   if (!S->getConditionVariable() && S->getCond() && !FullCond.get())
     return StmtError();
 
@@ -11348,6 +11373,12 @@ QualType TreeTransform<Derived>::RebuildAtomicType(QualType ValueType,
 }
 
 template<typename Derived>
+QualType TreeTransform<Derived>::RebuildPipeType(QualType ValueType,
+                                                   SourceLocation KWLoc) {
+  return SemaRef.BuildPipeType(ValueType, KWLoc);
+}
+
+template<typename Derived>
 TemplateName
 TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
                                             bool TemplateKW,
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h b/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h
index e59bc89..64f583c 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h
@@ -29,6 +29,7 @@ enum DeclUpdateKind {
   UPD_CXX_ADDED_FUNCTION_DEFINITION,
   UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER,
   UPD_CXX_INSTANTIATED_CLASS_DEFINITION,
+  UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT,
   UPD_CXX_RESOLVED_DTOR_DELETE,
   UPD_CXX_RESOLVED_EXCEPTION_SPEC,
   UPD_CXX_DEDUCED_RETURN_TYPE,
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
index a279475..833ff57 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
@@ -5640,6 +5640,17 @@ QualType ASTReader::readTypeRecord(unsigned Index) {
     QualType ValueType = readType(*Loc.F, Record, Idx);
     return Context.getAtomicType(ValueType);
   }
+
+  case TYPE_PIPE: {
+    if (Record.size() != 1) {
+      Error("Incorrect encoding of pipe type");
+      return QualType();
+    }
+
+    // Reading the pipe element type.
+    QualType ElementType = readType(*Loc.F, Record, Idx);
+    return Context.getPipeType(ElementType);
+  }
   }
   llvm_unreachable("Invalid TypeCode!");
 }
@@ -5911,6 +5922,9 @@ void TypeLocReader::VisitAtomicTypeLoc(AtomicTypeLoc TL) {
   TL.setLParenLoc(ReadSourceLocation(Record, Idx));
   TL.setRParenLoc(ReadSourceLocation(Record, Idx));
 }
+void TypeLocReader::VisitPipeTypeLoc(PipeTypeLoc TL) {
+  TL.setKWLoc(ReadSourceLocation(Record, Idx));
+}
 
 TypeSourceInfo *ASTReader::GetTypeSourceInfo(ModuleFile &F,
                                              const RecordData &Record,
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
index 8fb110e..5bf95f8 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -3626,6 +3626,21 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
           Reader.ReadSourceLocation(ModuleFile, Record, Idx));
       break;
 
+    case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: {
+      auto Param = cast<ParmVarDecl>(D);
+
+      // We have to read the default argument regardless of whether we use it
+      // so that hypothetical further update records aren't messed up.
+      // TODO: Add a function to skip over the next expr record.
+      auto DefaultArg = Reader.ReadExpr(F);
+
+      // Only apply the update if the parameter still has an uninstantiated
+      // default argument.
+      if (Param->hasUninstantiatedDefaultArg())
+        Param->setDefaultArg(DefaultArg);
+      break;
+    }
+
     case UPD_CXX_ADDED_FUNCTION_DEFINITION: {
       FunctionDecl *FD = cast<FunctionDecl>(D);
       if (Reader.PendingBodies[FD]) {
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
index bc678af..ad81ac8 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1364,10 +1364,7 @@ void ASTStmtReader::VisitCXXThrowExpr(CXXThrowExpr *E) {
 
 void ASTStmtReader::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) {
   VisitExpr(E);
-
-  assert((bool)Record[Idx] == E->Param.getInt() && "We messed up at creation ?");
-  ++Idx; // HasOtherExprStored and SubExpr was handled during creation.
-  E->Param.setPointer(ReadDeclAs<ParmVarDecl>(Record, Idx));
+  E->Param = ReadDeclAs<ParmVarDecl>(Record, Idx);
   E->Loc = ReadSourceLocation(Record, Idx);
 }
 
@@ -3205,16 +3202,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
     case EXPR_CXX_THROW:
       S = new (Context) CXXThrowExpr(Empty);
       break;
-    case EXPR_CXX_DEFAULT_ARG: {
-      bool HasOtherExprStored = Record[ASTStmtReader::NumExprFields];
-      if (HasOtherExprStored) {
-        Expr *SubExpr = ReadSubExpr();
-        S = CXXDefaultArgExpr::Create(Context, SourceLocation(), nullptr,
-                                      SubExpr);
-      } else
-        S = new (Context) CXXDefaultArgExpr(Empty);
+    case EXPR_CXX_DEFAULT_ARG:
+      S = new (Context) CXXDefaultArgExpr(Empty);
       break;
-    }
     case EXPR_CXX_DEFAULT_INIT:
       S = new (Context) CXXDefaultInitExpr(Empty);
       break;
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
index 0f50d7a..ec04cd6 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
@@ -446,6 +446,12 @@ ASTTypeWriter::VisitAtomicType(const AtomicType *T) {
   Code = TYPE_ATOMIC;
 }
 
+void
+ASTTypeWriter::VisitPipeType(const PipeType *T) {
+  Writer.AddTypeRef(T->getElementType(), Record);
+  Code = TYPE_PIPE;
+}
+
 namespace {
 
 class TypeLocWriter : public TypeLocVisitor<TypeLocWriter> {
@@ -672,6 +678,9 @@ void TypeLocWriter::VisitAtomicTypeLoc(AtomicTypeLoc TL) {
   Writer.AddSourceLocation(TL.getLParenLoc(), Record);
   Writer.AddSourceLocation(TL.getRParenLoc(), Record);
 }
+void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) {
+  Writer.AddSourceLocation(TL.getKWLoc(), Record);
+}
 
 void ASTWriter::WriteTypeAbbrevs() {
   using namespace llvm;
@@ -4611,6 +4620,11 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
         AddSourceLocation(Update.getLoc(), Record);
         break;
 
+      case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT:
+        AddStmt(const_cast<Expr*>(
+                  cast<ParmVarDecl>(Update.getDecl())->getDefaultArg()));
+        break;
+
       case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: {
         auto *RD = cast<CXXRecordDecl>(D);
         UpdatedDeclContexts.insert(RD->getPrimaryContext());
@@ -5779,6 +5793,15 @@ void ASTWriter::StaticDataMemberInstantiated(const VarDecl *D) {
        D->getMemberSpecializationInfo()->getPointOfInstantiation()));
 }
 
+void ASTWriter::DefaultArgumentInstantiated(const ParmVarDecl *D) {
+  assert(!WritingAST && "Already writing the AST!");
+  if (!D->isFromASTFile())
+    return;
+
+  DeclUpdates[D].push_back(
+      DeclUpdate(UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT, D));
+}
+
 void ASTWriter::AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                              const ObjCInterfaceDecl *IFD) {
   assert(!WritingAST && "Already writing the AST!");
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
index 20ca6d6..54bba28 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2033,7 +2033,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   //Character Literal
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getValue
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // getKind
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // getKind
   CharacterLiteralAbbrev = Stream.EmitAbbrev(Abv);
 
   // Abbreviation for EXPR_IMPLICIT_CAST
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
index e52ed05..000a218 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1336,15 +1336,8 @@ void ASTStmtWriter::VisitCXXThrowExpr(CXXThrowExpr *E) {
 
 void ASTStmtWriter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) {
   VisitExpr(E);
-
-  bool HasOtherExprStored = E->Param.getInt();
-  // Store these first, the reader reads them before creation.
-  Record.push_back(HasOtherExprStored);
-  if (HasOtherExprStored)
-    Writer.AddStmt(E->getExpr());
   Writer.AddDeclRef(E->getParam(), Record);
   Writer.AddSourceLocation(E->getUsedLocation(), Record);
-
   Code = serialization::EXPR_CXX_DEFAULT_ARG;
 }
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 5d78d9b..1753744 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -948,15 +948,15 @@ bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
 
   switch (MR->getKind()) {
-  case MemRegion::FunctionTextRegionKind: {
-    const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
+  case MemRegion::FunctionCodeRegionKind: {
+    const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl();
     if (FD)
       os << "the address of the function '" << *FD << '\'';
     else
       os << "the address of a function";
     return true;
   }
-  case MemRegion::BlockTextRegionKind:
+  case MemRegion::BlockCodeRegionKind:
     os << "block text";
     return true;
   case MemRegion::BlockDataRegionKind:
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index ce2c194..fee030f 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -1513,15 +1513,15 @@ bool MallocChecker::SummarizeValue(raw_ostream &os, SVal V) {
 bool MallocChecker::SummarizeRegion(raw_ostream &os,
                                     const MemRegion *MR) {
   switch (MR->getKind()) {
-  case MemRegion::FunctionTextRegionKind: {
-    const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
+  case MemRegion::FunctionCodeRegionKind: {
+    const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl();
     if (FD)
       os << "the address of the function '" << *FD << '\'';
     else
       os << "the address of a function";
     return true;
   }
-  case MemRegion::BlockTextRegionKind:
+  case MemRegion::BlockCodeRegionKind:
     os << "block text";
     return true;
   case MemRegion::BlockDataRegionKind:
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index a5b5871..175225b 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -316,7 +316,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
       case CK_ArrayToPointerDecay:
       case CK_BitCast:
       case CK_AddressSpaceConversion:
-      case CK_IntegralCast:
+      case CK_BooleanToSignedIntegral:
       case CK_NullToPointer:
       case CK_IntegralToPointer:
       case CK_PointerToIntegral:
@@ -345,6 +345,17 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
         // Delegate to SValBuilder to process.
         SVal V = state->getSVal(Ex, LCtx);
         V = svalBuilder.evalCast(V, T, ExTy);
+        // Negate the result if we're treating the boolean as a signed i1
+        if (CastE->getCastKind() == CK_BooleanToSignedIntegral)
+          V = evalMinus(V);
+        state = state->BindExpr(CastE, LCtx, V);
+        Bldr.generateNode(CastE, Pred, state);
+        continue;
+      }
+      case CK_IntegralCast: {
+        // Delegate to SValBuilder to process.
+        SVal V = state->getSVal(Ex, LCtx);
+        V = svalBuilder.evalIntegralCast(state, V, T, ExTy);
         state = state->BindExpr(CastE, LCtx, V);
         Bldr.generateNode(CastE, Pred, state);
         continue;
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
index ad3f396..30052cc 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -245,7 +245,7 @@ QualType CXXBaseObjectRegion::getValueType() const {
 // FoldingSet profiling.
 //===----------------------------------------------------------------------===//
 
-void MemSpaceRegion::Profile(llvm::FoldingSetNodeID& ID) const {
+void MemSpaceRegion::Profile(llvm::FoldingSetNodeID &ID) const {
   ID.AddInteger((unsigned)getKind());
 }
 
@@ -357,31 +357,31 @@ void ElementRegion::Profile(llvm::FoldingSetNodeID& ID) const {
   ElementRegion::ProfileRegion(ID, ElementType, Index, superRegion);
 }
 
-void FunctionTextRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
+void FunctionCodeRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
                                        const NamedDecl *FD,
                                        const MemRegion*) {
-  ID.AddInteger(MemRegion::FunctionTextRegionKind);
+  ID.AddInteger(MemRegion::FunctionCodeRegionKind);
   ID.AddPointer(FD);
 }
 
-void FunctionTextRegion::Profile(llvm::FoldingSetNodeID& ID) const {
-  FunctionTextRegion::ProfileRegion(ID, FD, superRegion);
+void FunctionCodeRegion::Profile(llvm::FoldingSetNodeID& ID) const {
+  FunctionCodeRegion::ProfileRegion(ID, FD, superRegion);
 }
 
-void BlockTextRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
+void BlockCodeRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
                                     const BlockDecl *BD, CanQualType,
                                     const AnalysisDeclContext *AC,
                                     const MemRegion*) {
-  ID.AddInteger(MemRegion::BlockTextRegionKind);
+  ID.AddInteger(MemRegion::BlockCodeRegionKind);
   ID.AddPointer(BD);
 }
 
-void BlockTextRegion::Profile(llvm::FoldingSetNodeID& ID) const {
-  BlockTextRegion::ProfileRegion(ID, BD, locTy, AC, superRegion);
+void BlockCodeRegion::Profile(llvm::FoldingSetNodeID& ID) const {
+  BlockCodeRegion::ProfileRegion(ID, BD, locTy, AC, superRegion);
 }
 
 void BlockDataRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
-                                    const BlockTextRegion *BC,
+                                    const BlockCodeRegion *BC,
                                     const LocationContext *LC,
                                     unsigned BlkCount,
                                     const MemRegion *sReg) {
@@ -457,11 +457,11 @@ void AllocaRegion::dumpToStream(raw_ostream &os) const {
   os << "alloca{" << (const void*) Ex << ',' << Cnt << '}';
 }
 
-void FunctionTextRegion::dumpToStream(raw_ostream &os) const {
+void FunctionCodeRegion::dumpToStream(raw_ostream &os) const {
   os << "code{" << getDecl()->getDeclName().getAsString() << '}';
 }
 
-void BlockTextRegion::dumpToStream(raw_ostream &os) const {
+void BlockCodeRegion::dumpToStream(raw_ostream &os) const {
   os << "block_code{" << (const void*) this << '}';
 }
 
@@ -533,6 +533,10 @@ void RegionRawOffset::dumpToStream(raw_ostream &os) const {
   os << "raw_offset{" << getRegion() << ',' << getOffset().getQuantity() << '}';
 }
 
+void CodeSpaceRegion::dumpToStream(raw_ostream &os) const {
+  os << "CodeSpaceRegion";
+}
+
 void StaticGlobalSpaceRegion::dumpToStream(raw_ostream &os) const {
   os << "StaticGlobalsMemSpace{" << CR << '}';
 }
@@ -711,11 +715,11 @@ const HeapSpaceRegion *MemRegionManager::getHeapRegion() {
   return LazyAllocate(heap);
 }
 
-const MemSpaceRegion *MemRegionManager::getUnknownRegion() {
+const UnknownSpaceRegion *MemRegionManager::getUnknownRegion() {
   return LazyAllocate(unknown);
 }
 
-const MemSpaceRegion *MemRegionManager::getCodeRegion() {
+const CodeSpaceRegion *MemRegionManager::getCodeRegion() {
   return LazyAllocate(code);
 }
 
@@ -815,11 +819,11 @@ const VarRegion* MemRegionManager::getVarRegion(const VarDecl *D,
         const Decl *STCD = STC->getDecl();
         if (isa<FunctionDecl>(STCD) || isa<ObjCMethodDecl>(STCD))
           sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind,
-                                  getFunctionTextRegion(cast<NamedDecl>(STCD)));
+                                  getFunctionCodeRegion(cast<NamedDecl>(STCD)));
         else if (const BlockDecl *BD = dyn_cast<BlockDecl>(STCD)) {
           // FIXME: The fallback type here is totally bogus -- though it should
           // never be queried, it will prevent uniquing with the real
-          // BlockTextRegion. Ideally we'd fix the AST so that we always had a
+          // BlockCodeRegion. Ideally we'd fix the AST so that we always had a
           // signature.
           QualType T;
           if (const TypeSourceInfo *TSI = BD->getSignatureAsWritten())
@@ -830,8 +834,8 @@ const VarRegion* MemRegionManager::getVarRegion(const VarDecl *D,
             T = getContext().getFunctionNoProtoType(T);
           T = getContext().getBlockPointerType(T);
 
-          const BlockTextRegion *BTR =
-            getBlockTextRegion(BD, C.getCanonicalType(T),
+          const BlockCodeRegion *BTR =
+            getBlockCodeRegion(BD, C.getCanonicalType(T),
                                STC->getAnalysisDeclContext());
           sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind,
                                   BTR);
@@ -852,7 +856,7 @@ const VarRegion *MemRegionManager::getVarRegion(const VarDecl *D,
 }
 
 const BlockDataRegion *
-MemRegionManager::getBlockDataRegion(const BlockTextRegion *BC,
+MemRegionManager::getBlockDataRegion(const BlockCodeRegion *BC,
                                      const LocationContext *LC,
                                      unsigned blockCount) {
   const MemRegion *sReg = nullptr;
@@ -925,15 +929,15 @@ MemRegionManager::getElementRegion(QualType elementType, NonLoc Idx,
   return R;
 }
 
-const FunctionTextRegion *
-MemRegionManager::getFunctionTextRegion(const NamedDecl *FD) {
-  return getSubRegion<FunctionTextRegion>(FD, getCodeRegion());
+const FunctionCodeRegion *
+MemRegionManager::getFunctionCodeRegion(const NamedDecl *FD) {
+  return getSubRegion<FunctionCodeRegion>(FD, getCodeRegion());
 }
 
-const BlockTextRegion *
-MemRegionManager::getBlockTextRegion(const BlockDecl *BD, CanQualType locTy,
+const BlockCodeRegion *
+MemRegionManager::getBlockCodeRegion(const BlockDecl *BD, CanQualType locTy,
                                      AnalysisDeclContext *AC) {
-  return getSubRegion<BlockTextRegion>(BD, locTy, AC, getCodeRegion());
+  return getSubRegion<BlockCodeRegion>(BD, locTy, AC, getCodeRegion());
 }
 
 
@@ -1196,7 +1200,7 @@ RegionOffset MemRegion::getAsOffset() const {
 
   while (1) {
     switch (R->getKind()) {
-    case GenericMemSpaceRegionKind:
+    case CodeSpaceRegionKind:
     case StackLocalsSpaceRegionKind:
     case StackArgumentsSpaceRegionKind:
     case HeapSpaceRegionKind:
@@ -1209,8 +1213,8 @@ RegionOffset MemRegion::getAsOffset() const {
       assert(Offset == 0 && !SymbolicOffsetBase);
       goto Finish;
 
-    case FunctionTextRegionKind:
-    case BlockTextRegionKind:
+    case FunctionCodeRegionKind:
+    case BlockCodeRegionKind:
     case BlockDataRegionKind:
       // These will never have bindings, but may end up having values requested
       // if the user does some strange casting.
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index 4f9ad9e..100fa75 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -536,19 +536,19 @@ bool ScanReachableSymbols::scan(const SymExpr *sym) {
 
   // TODO: should be rewritten using SymExpr::symbol_iterator.
   switch (sym->getKind()) {
-    case SymExpr::RegionValueKind:
-    case SymExpr::ConjuredKind:
-    case SymExpr::DerivedKind:
-    case SymExpr::ExtentKind:
-    case SymExpr::MetadataKind:
+    case SymExpr::SymbolRegionValueKind:
+    case SymExpr::SymbolConjuredKind:
+    case SymExpr::SymbolDerivedKind:
+    case SymExpr::SymbolExtentKind:
+    case SymExpr::SymbolMetadataKind:
       break;
-    case SymExpr::CastSymbolKind:
+    case SymExpr::SymbolCastKind:
       return scan(cast<SymbolCast>(sym)->getOperand());
-    case SymExpr::SymIntKind:
+    case SymExpr::SymIntExprKind:
       return scan(cast<SymIntExpr>(sym)->getLHS());
-    case SymExpr::IntSymKind:
+    case SymExpr::IntSymExprKind:
       return scan(cast<IntSymExpr>(sym)->getRHS());
-    case SymExpr::SymSymKind: {
+    case SymExpr::SymSymExprKind: {
       const SymSymExpr *x = cast<SymSymExpr>(sym);
       return scan(x->getLHS()) && scan(x->getRHS());
     }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 0a2b2e6..77b0ad3 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -171,7 +171,7 @@ private:
       case APSIntType::RTR_Below:
         // The entire range is outside the symbol's set of possible values.
         // If this is a conventionally-ordered range, the state is infeasible.
-        if (Lower < Upper)
+        if (Lower <= Upper)
           return false;
 
         // However, if the range wraps around, it spans all possible values.
@@ -222,7 +222,7 @@ private:
       case APSIntType::RTR_Above:
         // The entire range is outside the symbol's set of possible values.
         // If this is a conventionally-ordered range, the state is infeasible.
-        if (Lower < Upper)
+        if (Lower <= Upper)
           return false;
 
         // However, if the range wraps around, it spans all possible values.
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
index cdae040..1831522 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -214,15 +214,15 @@ SValBuilder::getDerivedRegionValueSymbolVal(SymbolRef parentSymbol,
 }
 
 DefinedSVal SValBuilder::getFunctionPointer(const FunctionDecl *func) {
-  return loc::MemRegionVal(MemMgr.getFunctionTextRegion(func));
+  return loc::MemRegionVal(MemMgr.getFunctionCodeRegion(func));
 }
 
 DefinedSVal SValBuilder::getBlockPointer(const BlockDecl *block,
                                          CanQualType locTy,
                                          const LocationContext *locContext,
                                          unsigned blockCount) {
-  const BlockTextRegion *BC =
-    MemMgr.getBlockTextRegion(block, locTy, locContext->getAnalysisDeclContext());
+  const BlockCodeRegion *BC =
+    MemMgr.getBlockCodeRegion(block, locTy, locContext->getAnalysisDeclContext());
   const BlockDataRegion *BD = MemMgr.getBlockDataRegion(BC, locContext,
                                                         blockCount);
   return loc::MemRegionVal(BD);
@@ -423,6 +423,45 @@ static bool shouldBeModeledWithNoOp(ASTContext &Context, QualType ToTy,
   return true;
 }
 
+// Handles casts of type CK_IntegralCast.
+// At the moment, this function will redirect to evalCast, except when the range
+// of the original value is known to be greater than the max of the target type.
+SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val,
+                                   QualType castTy, QualType originalTy) {
+
+  // No truncations if target type is big enough.
+  if (getContext().getTypeSize(castTy) >= getContext().getTypeSize(originalTy))
+    return evalCast(val, castTy, originalTy);
+
+  const SymExpr *se = val.getAsSymbolicExpression();
+  if (!se) // Let evalCast handle non symbolic expressions.
+    return evalCast(val, castTy, originalTy);
+
+  // Find the maximum value of the target type.
+  APSIntType ToType(getContext().getTypeSize(castTy),
+                    castTy->isUnsignedIntegerType());
+  llvm::APSInt ToTypeMax = ToType.getMaxValue();
+  NonLoc ToTypeMaxVal =
+      makeIntVal(ToTypeMax.isUnsigned() ? ToTypeMax.getZExtValue()
+                                        : ToTypeMax.getSExtValue(),
+                 castTy)
+          .castAs<NonLoc>();
+  // Check the range of the symbol being casted against the maximum value of the
+  // target type.
+  NonLoc FromVal = val.castAs<NonLoc>();
+  QualType CmpTy = getConditionType();
+  NonLoc CompVal =
+      evalBinOpNN(state, BO_LT, FromVal, ToTypeMaxVal, CmpTy).castAs<NonLoc>();
+  ProgramStateRef IsNotTruncated, IsTruncated;
+  std::tie(IsNotTruncated, IsTruncated) = state->assume(CompVal);
+  if (!IsNotTruncated && IsTruncated) {
+    // Symbol is truncated so we evaluate it as a cast.
+    NonLoc CastVal = makeNonLoc(se, originalTy, castTy);
+    return CastVal;
+  }
+  return evalCast(val, castTy, originalTy);
+}
+
 // FIXME: should rewrite according to the cast kind.
 SVal SValBuilder::evalCast(SVal val, QualType castTy, QualType originalTy) {
   castTy = Context.getCanonicalType(castTy);
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp
index 8de939f..dffee6c 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp
@@ -51,7 +51,7 @@ bool SVal::hasConjuredSymbol() const {
 const FunctionDecl *SVal::getAsFunctionDecl() const {
   if (Optional<loc::MemRegionVal> X = getAs<loc::MemRegionVal>()) {
     const MemRegion* R = X->getRegion();
-    if (const FunctionTextRegion *CTR = R->getAs<FunctionTextRegion>())
+    if (const FunctionCodeRegion *CTR = R->getAs<FunctionCodeRegion>())
       if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(CTR->getDecl()))
         return FD;
   }
@@ -240,7 +240,7 @@ void SVal::dump() const { dumpToStream(llvm::errs()); }
 
 void SVal::dumpToStream(raw_ostream &os) const {
   switch (getBaseKind()) {
-    case UnknownKind:
+    case UnknownValKind:
       os << "Unknown";
       break;
     case NonLocKind:
@@ -249,7 +249,7 @@ void SVal::dumpToStream(raw_ostream &os) const {
     case LocKind:
       castAs<Loc>().dumpToStream(os);
       break;
-    case UndefinedKind:
+    case UndefinedValKind:
       os << "Undefined";
       break;
   }
@@ -313,7 +313,7 @@ void Loc::dumpToStream(raw_ostream &os) const {
     case loc::GotoLabelKind:
       os << "&&" << castAs<loc::GotoLabel>().getLabel()->getName();
       break;
-    case loc::MemRegionKind:
+    case loc::MemRegionValKind:
       os << '&' << castAs<loc::MemRegionVal>().getRegion()->getString();
       break;
     default:
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index a704ce2..72b852b 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -141,9 +141,9 @@ SVal SimpleSValBuilder::evalCastFromLoc(Loc val, QualType castTy) {
   // unless this is a weak function or a symbolic region.
   if (castTy->isBooleanType()) {
     switch (val.getSubKind()) {
-      case loc::MemRegionKind: {
+      case loc::MemRegionValKind: {
         const MemRegion *R = val.castAs<loc::MemRegionVal>().getRegion();
-        if (const FunctionTextRegion *FTR = dyn_cast<FunctionTextRegion>(R))
+        if (const FunctionCodeRegion *FTR = dyn_cast<FunctionCodeRegion>(R))
           if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FTR->getDecl()))
             if (FD->isWeak())
               // FIXME: Currently we are using an extent symbol here,
@@ -689,7 +689,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
     // completely unknowable.
     return UnknownVal();
   }
-  case loc::MemRegionKind: {
+  case loc::MemRegionValKind: {
     if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
       // If one of the operands is a symbol and the other is a constant,
       // build an expression for use by the constraint manager.
@@ -718,7 +718,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
 
     // Get both values as regions, if possible.
     const MemRegion *LeftMR = lhs.getAsRegion();
-    assert(LeftMR && "MemRegionKind SVal doesn't have a region!");
+    assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
 
     const MemRegion *RightMR = rhs.getAsRegion();
     if (!RightMR)
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp
index 7cdb55a..de29f0e 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp
@@ -100,7 +100,7 @@ const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy)
   // Process region cast according to the kind of the region being cast.
   switch (R->getKind()) {
     case MemRegion::CXXThisRegionKind:
-    case MemRegion::GenericMemSpaceRegionKind:
+    case MemRegion::CodeSpaceRegionKind:
     case MemRegion::StackLocalsSpaceRegionKind:
     case MemRegion::StackArgumentsSpaceRegionKind:
     case MemRegion::HeapSpaceRegionKind:
@@ -112,8 +112,8 @@ const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy)
       llvm_unreachable("Invalid region cast");
     }
 
-    case MemRegion::FunctionTextRegionKind:
-    case MemRegion::BlockTextRegionKind:
+    case MemRegion::FunctionCodeRegionKind:
+    case MemRegion::BlockCodeRegionKind:
     case MemRegion::BlockDataRegionKind:
     case MemRegion::StringRegionKind:
       // FIXME: Need to handle arbitrary downcasts.
@@ -393,7 +393,7 @@ SVal StoreManager::getLValueFieldOrIvar(const Decl *D, SVal Base) {
   const MemRegion* BaseR = nullptr;
 
   switch (BaseL.getSubKind()) {
-  case loc::MemRegionKind:
+  case loc::MemRegionValKind:
     BaseR = BaseL.castAs<loc::MemRegionVal>().getRegion();
     break;
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
index 99b2e14..2dd252c 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -115,22 +115,22 @@ void SymExpr::symbol_iterator::expand() {
   const SymExpr *SE = itr.pop_back_val();
 
   switch (SE->getKind()) {
-    case SymExpr::RegionValueKind:
-    case SymExpr::ConjuredKind:
-    case SymExpr::DerivedKind:
-    case SymExpr::ExtentKind:
-    case SymExpr::MetadataKind:
+    case SymExpr::SymbolRegionValueKind:
+    case SymExpr::SymbolConjuredKind:
+    case SymExpr::SymbolDerivedKind:
+    case SymExpr::SymbolExtentKind:
+    case SymExpr::SymbolMetadataKind:
       return;
-    case SymExpr::CastSymbolKind:
+    case SymExpr::SymbolCastKind:
       itr.push_back(cast<SymbolCast>(SE)->getOperand());
       return;
-    case SymExpr::SymIntKind:
+    case SymExpr::SymIntExprKind:
       itr.push_back(cast<SymIntExpr>(SE)->getLHS());
       return;
-    case SymExpr::IntSymKind:
+    case SymExpr::IntSymExprKind:
       itr.push_back(cast<IntSymExpr>(SE)->getRHS());
       return;
-    case SymExpr::SymSymKind: {
+    case SymExpr::SymSymExprKind: {
       const SymSymExpr *x = cast<SymSymExpr>(SE);
       itr.push_back(x->getLHS());
       itr.push_back(x->getRHS());
@@ -458,35 +458,35 @@ bool SymbolReaper::isLive(SymbolRef sym) {
   bool KnownLive;
 
   switch (sym->getKind()) {
-  case SymExpr::RegionValueKind:
+  case SymExpr::SymbolRegionValueKind:
     KnownLive = isLiveRegion(cast<SymbolRegionValue>(sym)->getRegion());
     break;
-  case SymExpr::ConjuredKind:
+  case SymExpr::SymbolConjuredKind:
     KnownLive = false;
     break;
-  case SymExpr::DerivedKind:
+  case SymExpr::SymbolDerivedKind:
     KnownLive = isLive(cast<SymbolDerived>(sym)->getParentSymbol());
     break;
-  case SymExpr::ExtentKind:
+  case SymExpr::SymbolExtentKind:
     KnownLive = isLiveRegion(cast<SymbolExtent>(sym)->getRegion());
     break;
-  case SymExpr::MetadataKind:
+  case SymExpr::SymbolMetadataKind:
     KnownLive = MetadataInUse.count(sym) &&
                 isLiveRegion(cast<SymbolMetadata>(sym)->getRegion());
     if (KnownLive)
       MetadataInUse.erase(sym);
     break;
-  case SymExpr::SymIntKind:
+  case SymExpr::SymIntExprKind:
     KnownLive = isLive(cast<SymIntExpr>(sym)->getLHS());
     break;
-  case SymExpr::IntSymKind:
+  case SymExpr::IntSymExprKind:
     KnownLive = isLive(cast<IntSymExpr>(sym)->getRHS());
     break;
-  case SymExpr::SymSymKind:
+  case SymExpr::SymSymExprKind:
     KnownLive = isLive(cast<SymSymExpr>(sym)->getLHS()) &&
                 isLive(cast<SymSymExpr>(sym)->getRHS());
     break;
-  case SymExpr::CastSymbolKind:
+  case SymExpr::SymbolCastKind:
     KnownLive = isLive(cast<SymbolCast>(sym)->getOperand());
     break;
   }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index bf85c4c..d144685 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -496,10 +496,11 @@ void AnalysisConsumer::HandleDeclsCallGraph(const unsigned LocalTUDeclsSize) {
                (Mgr->options.InliningMode == All ? nullptr : &VisitedCallees));
 
     // Add the visited callees to the global visited set.
-    for (SetOfConstDecls::iterator I = VisitedCallees.begin(),
-                                   E = VisitedCallees.end(); I != E; ++I) {
-        Visited.insert(*I);
-    }
+    for (const Decl *Callee : VisitedCallees)
+      // Decls from CallGraph are already canonical. But Decls coming from
+      // CallExprs may be not. We should canonicalize them manually.
+      Visited.insert(isa<ObjCMethodDecl>(Callee) ? Callee
+                                                 : Callee->getCanonicalDecl());
     VisitedAsTopLevel.insert(D);
   }
 }
diff --git a/contrib/llvm/tools/lldb/include/lldb/Core/LoadedModuleInfoList.h b/contrib/llvm/tools/lldb/include/lldb/Core/LoadedModuleInfoList.h
new file mode 100644
index 0000000..6ba5c28
--- /dev/null
+++ b/contrib/llvm/tools/lldb/include/lldb/Core/LoadedModuleInfoList.h
@@ -0,0 +1,152 @@
+//===-- LoadedModuleInfoList.h ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef liblldb_LoadedModuleInfoList_h_
+#define liblldb_LoadedModuleInfoList_h_
+
+// C Includes
+
+// C++ Includes
+#include <vector>
+
+// Other libraries and framework includes
+#include "lldb/lldb-private-forward.h"
+
+namespace lldb_private {
+class LoadedModuleInfoList
+{
+public:
+
+    class LoadedModuleInfo
+    {
+    public:
+
+        enum e_data_point
+        {
+            e_has_name      = 0,
+            e_has_base      ,
+            e_has_dynamic   ,
+            e_has_link_map  ,
+            e_num
+        };
+
+        LoadedModuleInfo ()
+        {
+            for (uint32_t i = 0; i < e_num; ++i)
+                m_has[i] = false;
+        };
+
+        void set_name (const std::string & name)
+        {
+            m_name = name;
+            m_has[e_has_name] = true;
+        }
+        bool get_name (std::string & out) const
+        {
+            out = m_name;
+            return m_has[e_has_name];
+        }
+
+        void set_base (const lldb::addr_t base)
+        {
+            m_base = base;
+            m_has[e_has_base] = true;
+        }
+        bool get_base (lldb::addr_t & out) const
+        {
+            out = m_base;
+            return m_has[e_has_base];
+        }
+
+        void set_base_is_offset (bool is_offset)
+        {
+            m_base_is_offset = is_offset;
+        }
+        bool get_base_is_offset(bool & out) const
+        {
+            out = m_base_is_offset;
+            return m_has[e_has_base];
+        }
+
+        void set_link_map (const lldb::addr_t addr)
+        {
+            m_link_map = addr;
+            m_has[e_has_link_map] = true;
+        }
+        bool get_link_map (lldb::addr_t & out) const
+        {
+            out = m_link_map;
+            return m_has[e_has_link_map];
+        }
+
+        void set_dynamic (const lldb::addr_t addr)
+        {
+            m_dynamic = addr;
+            m_has[e_has_dynamic] = true;
+        }
+        bool get_dynamic (lldb::addr_t & out) const
+        {
+            out = m_dynamic;
+            return m_has[e_has_dynamic];
+        }
+
+        bool has_info (e_data_point datum) const
+        {
+            assert (datum < e_num);
+            return m_has[datum];
+        }
+
+        bool
+        operator == (LoadedModuleInfo const &rhs) const
+        {
+            if (e_num != rhs.e_num)
+                return false;
+
+            for (size_t i = 0; i < e_num; ++i)
+            {
+                if (m_has[i] != rhs.m_has[i])
+                    return false;
+            }
+
+            return (m_base == rhs.m_base) &&
+                   (m_link_map == rhs.m_link_map) &&
+                   (m_dynamic == rhs.m_dynamic) &&
+                   (m_name == rhs.m_name);
+        }
+    protected:
+
+        bool m_has[e_num];
+        std::string m_name;
+        lldb::addr_t m_link_map;
+        lldb::addr_t m_base;
+        bool m_base_is_offset;
+        lldb::addr_t m_dynamic;
+    };
+
+    LoadedModuleInfoList ()
+        : m_list ()
+        , m_link_map (LLDB_INVALID_ADDRESS)
+    {}
+
+    void add (const LoadedModuleInfo & mod)
+    {
+        m_list.push_back (mod);
+    }
+
+    void clear ()
+    {
+        m_list.clear ();
+    }
+
+    std::vector<LoadedModuleInfo> m_list;
+    lldb::addr_t m_link_map;
+};
+} // namespace lldb_private
+
+#endif  // liblldb_LoadedModuleInfoList_h_
diff --git a/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandReturnObject.h b/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandReturnObject.h
index 424ac80..e3a1d9f 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandReturnObject.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandReturnObject.h
@@ -169,6 +169,18 @@ public:
     
     void
     SetInteractive (bool b);
+    
+    bool
+    GetAbnormalStopWasExpected() const
+    {
+        return m_abnormal_stop_was_expected;
+    }
+    
+    void
+    SetAbnormalStopWasExpected(bool signal_was_expected)
+    {
+        m_abnormal_stop_was_expected = signal_was_expected;
+    }
 
 private:
     enum 
@@ -182,7 +194,13 @@ private:
     
     lldb::ReturnStatus m_status;
     bool m_did_change_process_state;
-    bool m_interactive; // If true, then the input handle from the debugger will be hooked up
+    bool m_interactive;          // If true, then the input handle from the debugger will be hooked up
+    bool m_abnormal_stop_was_expected;  // This is to support eHandleCommandFlagStopOnCrash vrs. attach.
+                                        // The attach command often ends up with the process stopped due to a signal.
+                                        // Normally that would mean stop on crash should halt batch execution, but we
+                                        // obviously don't want that for attach.  Using this flag, the attach command
+                                        // (and anything else for which this is relevant) can say that the signal is
+                                        // expected, and batch command execution can continue.
 };
 
 } // namespace lldb_private
diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/ClangASTContext.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/ClangASTContext.h
index 0314ce0..bd3a113 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Symbol/ClangASTContext.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/ClangASTContext.h
@@ -573,6 +573,9 @@ public:
     ConstString
     DeclContextGetName (void *opaque_decl_ctx) override;
 
+    ConstString
+    DeclContextGetScopeQualifiedName (void *opaque_decl_ctx) override;
+
     bool
     DeclContextIsClassMethod (void *opaque_decl_ctx,
                               lldb::LanguageType *language_ptr,
diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/CompilerDeclContext.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/CompilerDeclContext.h
index 70399b2..9135b44 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Symbol/CompilerDeclContext.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/CompilerDeclContext.h
@@ -128,6 +128,9 @@ public:
     ConstString
     GetName () const;
 
+    ConstString
+    GetScopeQualifiedName() const;
+
     bool
     IsStructUnionOrClass () const;
 
diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/GoASTContext.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/GoASTContext.h
index 3de98da..09d79ba 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Symbol/GoASTContext.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/GoASTContext.h
@@ -112,6 +112,12 @@ class GoASTContext : public TypeSystem
         return ConstString();
     }
 
+    ConstString
+    DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) override
+    {
+        return ConstString();
+    }
+
     bool
     DeclContextIsClassMethod(void *opaque_decl_ctx, lldb::LanguageType *language_ptr, bool *is_instance_method_ptr,
                              ConstString *language_object_name_ptr) override
diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/SymbolFile.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/SymbolFile.h
index e27b32d..fe74ad4 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Symbol/SymbolFile.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/SymbolFile.h
@@ -144,6 +144,7 @@ public:
     virtual uint32_t        FindTypes (const SymbolContext& sc, const ConstString &name, const CompilerDeclContext *parent_decl_ctx, bool append, uint32_t max_matches, TypeMap& types);
     virtual size_t          FindTypes (const std::vector<CompilerContext> &context, bool append, TypeMap& types);
 
+    virtual void            GetMangledNamesForFunction(const std::string &scope_qualified_name, std::vector<ConstString> &mangled_names);
 //  virtual uint32_t        FindTypes (const SymbolContext& sc, const RegularExpression& regex, bool append, uint32_t max_matches, TypeList& types) = 0;
     virtual TypeList *      GetTypeList ();
     virtual size_t          GetTypes (lldb_private::SymbolContextScope *sc_scope,
diff --git a/contrib/llvm/tools/lldb/include/lldb/Symbol/TypeSystem.h b/contrib/llvm/tools/lldb/include/lldb/Symbol/TypeSystem.h
index d367bcd..9b43b9d 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Symbol/TypeSystem.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Symbol/TypeSystem.h
@@ -151,6 +151,9 @@ public:
     virtual ConstString
     DeclContextGetName (void *opaque_decl_ctx) = 0;
 
+    virtual ConstString
+    DeclContextGetScopeQualifiedName (void *opaque_decl_ctx) = 0;
+
     virtual bool
     DeclContextIsClassMethod (void *opaque_decl_ctx,
                               lldb::LanguageType *language_ptr,
diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/CPPLanguageRuntime.h b/contrib/llvm/tools/lldb/include/lldb/Target/CPPLanguageRuntime.h
index ac537d0..788f4e6 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Target/CPPLanguageRuntime.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Target/CPPLanguageRuntime.h
@@ -42,9 +42,6 @@ public:
     bool
     GetObjectDescription(Stream &str, Value &value, ExecutionContextScope *exe_scope) override;
     
-    virtual size_t
-    GetAlternateManglings(const ConstString &mangled, std::vector<ConstString> &alternates) = 0;
-
 protected:
 
     //------------------------------------------------------------------
diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/Process.h b/contrib/llvm/tools/lldb/include/lldb/Target/Process.h
index 2e063c5..6bb7a3d 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Target/Process.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Target/Process.h
@@ -30,6 +30,7 @@
 #include "lldb/Core/Communication.h"
 #include "lldb/Core/Error.h"
 #include "lldb/Core/Event.h"
+#include "lldb/Core/LoadedModuleInfoList.h"
 #include "lldb/Core/ThreadSafeValue.h"
 #include "lldb/Core/PluginInterface.h"
 #include "lldb/Core/StructuredData.h"
@@ -1152,6 +1153,12 @@ public:
         return 0;
     }
 
+    virtual size_t
+    LoadModules (LoadedModuleInfoList &)
+    {
+       return 0;
+    }
+
 protected:
     virtual JITLoaderList &
     GetJITLoaders ();
@@ -3149,6 +3156,34 @@ public:
     void
     ResetImageToken(size_t token);
 
+    //------------------------------------------------------------------
+    /// Find the next branch instruction to set a breakpoint on
+    ///
+    /// When instruction stepping through a source line, instead of 
+    /// stepping through each instruction, we can put a breakpoint on
+    /// the next branch instruction (within the range of instructions
+    /// we are stepping through) and continue the process to there,
+    /// yielding significant performance benefits over instruction
+    /// stepping.  
+    ///
+    /// @param[in] default_stop_addr
+    ///     The address of the instruction where lldb would put a 
+    ///     breakpoint normally.
+    ///
+    /// @param[in] range_bounds
+    ///     The range which the breakpoint must be contained within.
+    ///     Typically a source line.
+    ///
+    /// @return
+    ///     The address of the next branch instruction, or the end of
+    ///     the range provided in range_bounds.  If there are any
+    ///     problems with the disassembly or getting the instructions,
+    ///     the original default_stop_addr will be returned.
+    //------------------------------------------------------------------
+    Address
+    AdvanceAddressToNextBranchInstruction (Address default_stop_addr, 
+                                           AddressRange range_bounds);
+
 protected:
     void
     SetState (lldb::EventSP &event_sp);
diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/SystemRuntime.h b/contrib/llvm/tools/lldb/include/lldb/Target/SystemRuntime.h
index 54fde88..cefd724 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Target/SystemRuntime.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Target/SystemRuntime.h
@@ -275,6 +275,23 @@ public:
         return LLDB_INVALID_ADDRESS;
     }
 
+
+    //------------------------------------------------------------------
+    /// Retrieve the Queue kind for the queue at a thread's dispatch_qaddr.
+    ///
+    /// Retrieve the Queue kind - either eQueueKindSerial or 
+    /// eQueueKindConcurrent, indicating that this queue processes work
+    /// items serially or concurrently.
+    ///
+    /// @return
+    ///     The Queue kind, if it could be read, else eQueueKindUnknown.
+    //------------------------------------------------------------------
+    virtual lldb::QueueKind
+    GetQueueKind (lldb::addr_t dispatch_qaddr)
+    {
+        return lldb::eQueueKindUnknown;
+    }
+
     //------------------------------------------------------------------
     /// Get the pending work items for a libdispatch Queue
     ///
diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/Thread.h b/contrib/llvm/tools/lldb/include/lldb/Target/Thread.h
index 7aff77b..ba73e0b 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Target/Thread.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Target/Thread.h
@@ -367,6 +367,35 @@ public:
     }
 
     //------------------------------------------------------------------
+    /// Whether this thread can be associated with a libdispatch queue
+    /// 
+    /// The Thread may know if it is associated with a libdispatch queue,
+    /// it may know definitively that it is NOT associated with a libdispatch
+    /// queue, or it may be unknown whether it is associated with a libdispatch
+    /// queue.  
+    ///
+    /// @return
+    ///     eLazyBoolNo if this thread is definitely not associated with a
+    ///     libdispatch queue (e.g. on a non-Darwin system where GCD aka 
+    ///     libdispatch is not available).
+    ///
+    ///     eLazyBoolYes this thread is associated with a libdispatch queue.
+    ///
+    ///     eLazyBoolCalculate this thread may be associated with a libdispatch 
+    ///     queue but the thread doesn't know one way or the other.
+    //------------------------------------------------------------------
+    virtual lldb_private::LazyBool
+    GetAssociatedWithLibdispatchQueue ()
+    {
+        return eLazyBoolNo;
+    }
+
+    virtual void
+    SetAssociatedWithLibdispatchQueue (lldb_private::LazyBool associated_with_libdispatch_queue)
+    {
+    }
+
+    //------------------------------------------------------------------
     /// Retrieve the Queue ID for the queue currently using this Thread
     ///
     /// If this Thread is doing work on behalf of a libdispatch/GCD queue,
@@ -414,6 +443,29 @@ public:
     }
 
     //------------------------------------------------------------------
+    /// Retrieve the Queue kind for the queue currently using this Thread
+    ///
+    /// If this Thread is doing work on behalf of a libdispatch/GCD queue,
+    /// retrieve the Queue kind - either eQueueKindSerial or 
+    /// eQueueKindConcurrent, indicating that this queue processes work
+    /// items serially or concurrently.
+    ///
+    /// @return
+    ///     The Queue kind, if the Thread subclass implements this, else
+    ///     eQueueKindUnknown.
+    //------------------------------------------------------------------
+    virtual lldb::QueueKind
+    GetQueueKind ()
+    {
+        return lldb::eQueueKindUnknown;
+    }
+
+    virtual void
+    SetQueueKind (lldb::QueueKind kind)
+    {
+    }
+
+    //------------------------------------------------------------------
     /// Retrieve the Queue for this thread, if any.
     ///
     /// @return
@@ -451,6 +503,30 @@ public:
         return LLDB_INVALID_ADDRESS;
     }
 
+    virtual void
+    SetQueueLibdispatchQueueAddress (lldb::addr_t dispatch_queue_t)
+    {
+    }
+
+    //------------------------------------------------------------------
+    /// Whether this Thread already has all the Queue information cached or not
+    ///
+    /// A Thread may be associated with a libdispatch work Queue at a given
+    /// public stop event.  If so, the thread can satisify requests like
+    /// GetQueueLibdispatchQueueAddress, GetQueueKind, GetQueueName, and GetQueueID
+    /// either from information from the remote debug stub when it is initially
+    /// created, or it can query the SystemRuntime for that information.
+    ///
+    /// This method allows the SystemRuntime to discover if a thread has this
+    /// information already, instead of calling the thread to get the information
+    /// and having the thread call the SystemRuntime again.
+    //------------------------------------------------------------------
+    virtual bool
+    ThreadHasQueueInformation () const
+    {
+        return false;
+    }
+
     virtual uint32_t
     GetStackFrameCount()
     {
@@ -888,6 +964,16 @@ public:
     /// @param[in] run_vote
     ///    See standard meanings for the stop & run votes in ThreadPlan.h.
     ///
+    /// @param[in] continue_to_next_branch
+    ///    Normally this will enqueue a plan that will put a breakpoint on the return address and continue
+    ///    to there.  If continue_to_next_branch is true, this is an operation not involving the user -- 
+    ///    e.g. stepping "next" in a source line and we instruction stepped into another function -- 
+    ///    so instead of putting a breakpoint on the return address, advance the breakpoint to the 
+    ///    end of the source line that is doing the call, or until the next flow control instruction.
+    ///    If the return value from the function call is to be retrieved / displayed to the user, you must stop
+    ///    on the return address.  The return value may be stored in volatile registers which are overwritten
+    ///    before the next branch instruction.
+    ///
     /// @return
     ///     A shared pointer to the newly queued thread plan, or nullptr if the plan could not be queued.
     //------------------------------------------------------------------
@@ -898,7 +984,8 @@ public:
                                            bool stop_other_threads,
                                            Vote stop_vote, // = eVoteYes,
                                            Vote run_vote, // = eVoteNoOpinion);
-                                           uint32_t frame_idx);
+                                           uint32_t frame_idx,
+                                           bool continue_to_next_branch = false);
 
     //------------------------------------------------------------------
     /// Gets the plan used to step through the code that steps from a function
diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/ThreadPlanStepOut.h b/contrib/llvm/tools/lldb/include/lldb/Target/ThreadPlanStepOut.h
index ac56963..ccf829f 100644
--- a/contrib/llvm/tools/lldb/include/lldb/Target/ThreadPlanStepOut.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Target/ThreadPlanStepOut.h
@@ -31,7 +31,8 @@ public:
                        Vote stop_vote,
                        Vote run_vote,
                        uint32_t frame_idx,
-                       LazyBool step_out_avoids_code_without_debug_info);
+                       LazyBool step_out_avoids_code_without_debug_info,
+                       bool continue_to_next_branch = false);
 
     ~ThreadPlanStepOut() override;
 
diff --git a/contrib/llvm/tools/lldb/source/Commands/CommandObjectProcess.cpp b/contrib/llvm/tools/lldb/source/Commands/CommandObjectProcess.cpp
index b7f894f..a85ea17 100644
--- a/contrib/llvm/tools/lldb/source/Commands/CommandObjectProcess.cpp
+++ b/contrib/llvm/tools/lldb/source/Commands/CommandObjectProcess.cpp
@@ -550,6 +550,7 @@ protected:
                     result.AppendMessage(stream.GetData());
                 result.SetStatus (eReturnStatusSuccessFinishNoResult);
                 result.SetDidChangeProcessState (true);
+                result.SetAbnormalStopWasExpected(true);
             }
             else
             {
diff --git a/contrib/llvm/tools/lldb/source/Core/StringList.cpp b/contrib/llvm/tools/lldb/source/Core/StringList.cpp
index ce197ac..98a0790 100644
--- a/contrib/llvm/tools/lldb/source/Core/StringList.cpp
+++ b/contrib/llvm/tools/lldb/source/Core/StringList.cpp
@@ -12,7 +12,6 @@
 #include "lldb/Core/StreamString.h"
 #include "lldb/Host/FileSpec.h"
 #include "lldb/Core/Log.h"
-#include "lldb/Core/StreamString.h"
 
 #include <string>
 
diff --git a/contrib/llvm/tools/lldb/source/Host/common/HostInfoBase.cpp b/contrib/llvm/tools/lldb/source/Host/common/HostInfoBase.cpp
index 0f4324f..f7ba755 100644
--- a/contrib/llvm/tools/lldb/source/Host/common/HostInfoBase.cpp
+++ b/contrib/llvm/tools/lldb/source/Host/common/HostInfoBase.cpp
@@ -409,13 +409,13 @@ HostInfoBase::ComputeHostArchitectureSupport(ArchSpec &arch_32, ArchSpec &arch_6
             arch_32.SetTriple(triple);
             break;
 
+        case llvm::Triple::aarch64:
         case llvm::Triple::ppc64:
         case llvm::Triple::x86_64:
             arch_64.SetTriple(triple);
             arch_32.SetTriple(triple.get32BitArchVariant());
             break;
 
-        case llvm::Triple::aarch64:
         case llvm::Triple::mips64:
         case llvm::Triple::mips64el:
         case llvm::Triple::sparcv9:
diff --git a/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp b/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp
index b7cc607..fd88f0d 100644
--- a/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -3038,7 +3038,10 @@ CommandInterpreter::IOHandlerInputComplete (IOHandler &io_handler, std::string &
                 for (ThreadSP thread_sp : process_sp->GetThreadList().Threads())
                 {
                     StopReason reason = thread_sp->GetStopReason();
-                    if (reason == eStopReasonSignal || reason == eStopReasonException || reason == eStopReasonInstrumentation)
+                    if ((reason == eStopReasonSignal
+                        || reason == eStopReasonException
+                        || reason == eStopReasonInstrumentation)
+                        && !result.GetAbnormalStopWasExpected())
                     {
                         should_stop = true;
                         break;
diff --git a/contrib/llvm/tools/lldb/source/Interpreter/CommandReturnObject.cpp b/contrib/llvm/tools/lldb/source/Interpreter/CommandReturnObject.cpp
index 1b54187..b083c7f 100644
--- a/contrib/llvm/tools/lldb/source/Interpreter/CommandReturnObject.cpp
+++ b/contrib/llvm/tools/lldb/source/Interpreter/CommandReturnObject.cpp
@@ -47,7 +47,8 @@ CommandReturnObject::CommandReturnObject () :
     m_err_stream (),
     m_status (eReturnStatusStarted),
     m_did_change_process_state (false),
-    m_interactive (true)
+    m_interactive (true),
+    m_abnormal_stop_was_expected(false)
 {
 }
 
diff --git a/contrib/llvm/tools/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp b/contrib/llvm/tools/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp
index bc62c9f..e3da363 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp
@@ -206,6 +206,7 @@ ABISysV_mips64::PrepareTrivialCall (Thread &thread,
     const RegisterInfo *pc_reg_info = reg_ctx->GetRegisterInfo (eRegisterKindGeneric, LLDB_REGNUM_GENERIC_PC);
     const RegisterInfo *sp_reg_info = reg_ctx->GetRegisterInfo (eRegisterKindGeneric, LLDB_REGNUM_GENERIC_SP);
     const RegisterInfo *ra_reg_info = reg_ctx->GetRegisterInfo (eRegisterKindGeneric, LLDB_REGNUM_GENERIC_RA);
+    const RegisterInfo *r25_info = reg_ctx->GetRegisterInfoByName("r25", 0);
 
     if (log)
     log->Printf("Writing SP: 0x%" PRIx64, (uint64_t)sp);
@@ -228,6 +229,13 @@ ABISysV_mips64::PrepareTrivialCall (Thread &thread,
     if (!reg_ctx->WriteRegisterFromUnsigned (pc_reg_info, func_addr))
         return false;
 
+    if (log)
+        log->Printf("Writing r25: 0x%" PRIx64, (uint64_t)func_addr);
+
+    // All callers of position independent functions must place the address of the called function in t9 (r25)
+    if (!reg_ctx->WriteRegisterFromUnsigned (r25_info, func_addr))
+        return false;
+
     return true;
 }
 
diff --git a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
index 09e874a..443f97e 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
@@ -107,6 +107,7 @@ DYLDRendezvous::DYLDRendezvous(Process *process)
       m_rendezvous_addr(LLDB_INVALID_ADDRESS),
       m_current(),
       m_previous(),
+      m_loaded_modules(),
       m_soentries(),
       m_added_soentries(),
       m_removed_soentries()
@@ -181,6 +182,9 @@ DYLDRendezvous::Resolve()
     m_previous = m_current;
     m_current = info;
 
+   if (UpdateSOEntries (true))
+       return true;
+
     return UpdateSOEntries();
 }
 
@@ -191,18 +195,23 @@ DYLDRendezvous::IsValid()
 }
 
 bool
-DYLDRendezvous::UpdateSOEntries()
+DYLDRendezvous::UpdateSOEntries(bool fromRemote)
 {
     SOEntry entry;
+    LoadedModuleInfoList module_list;
 
-    if (m_current.map_addr == 0)
+    // If we can't get the SO info from the remote, return failure.
+    if (fromRemote && m_process->LoadModules (module_list) == 0)
+        return false;
+
+    if (!fromRemote && m_current.map_addr == 0)
         return false;
 
     // When the previous and current states are consistent this is the first
     // time we have been asked to update.  Just take a snapshot of the currently
     // loaded modules.
-    if (m_previous.state == eConsistent && m_current.state == eConsistent) 
-        return TakeSnapshot(m_soentries);
+    if (m_previous.state == eConsistent && m_current.state == eConsistent)
+        return fromRemote ? SaveSOEntriesFromRemote(module_list) : TakeSnapshot(m_soentries);
 
     // If we are about to add or remove a shared object clear out the current
     // state and take a snapshot of the currently loaded images.
@@ -215,6 +224,9 @@ DYLDRendezvous::UpdateSOEntries()
             return false;
 
         m_soentries.clear();
+        if (fromRemote)
+            return SaveSOEntriesFromRemote(module_list);
+
         m_added_soentries.clear();
         m_removed_soentries.clear();
         return TakeSnapshot(m_soentries);
@@ -224,15 +236,133 @@ DYLDRendezvous::UpdateSOEntries()
     // Otherwise check the previous state to determine what to expect and update
     // accordingly.
     if (m_previous.state == eAdd)
-        return UpdateSOEntriesForAddition();
+        return fromRemote ? AddSOEntriesFromRemote(module_list) : AddSOEntries();
     else if (m_previous.state == eDelete)
-        return UpdateSOEntriesForDeletion();
+        return fromRemote ? RemoveSOEntriesFromRemote(module_list) : RemoveSOEntries();
 
     return false;
 }
- 
+
 bool
-DYLDRendezvous::UpdateSOEntriesForAddition()
+DYLDRendezvous::FillSOEntryFromModuleInfo (LoadedModuleInfoList::LoadedModuleInfo const & modInfo,
+                                           SOEntry &entry)
+{
+    addr_t link_map_addr;
+    addr_t base_addr;
+    addr_t dyn_addr;
+    std::string name;
+
+    if (!modInfo.get_link_map (link_map_addr) ||
+        !modInfo.get_base (base_addr) ||
+        !modInfo.get_dynamic (dyn_addr) ||
+        !modInfo.get_name (name))
+        return false;
+
+    entry.link_addr = link_map_addr;
+    entry.base_addr = base_addr;
+    entry.dyn_addr = dyn_addr;
+
+    entry.file_spec.SetFile(name, false);
+
+    UpdateBaseAddrIfNecessary(entry, name);
+
+    // not needed if we're using ModuleInfos
+    entry.next = 0;
+    entry.prev = 0;
+    entry.path_addr = 0;
+
+    return true;
+}
+
+bool
+DYLDRendezvous::SaveSOEntriesFromRemote(LoadedModuleInfoList &module_list)
+{
+    for (auto const & modInfo : module_list.m_list)
+    {
+        SOEntry entry;
+        if (!FillSOEntryFromModuleInfo(modInfo, entry))
+            return false;
+
+        // Only add shared libraries and not the executable.
+        if (!SOEntryIsMainExecutable(entry))
+            m_soentries.push_back(entry);
+    }
+
+    m_loaded_modules = module_list;
+    return true;
+
+}
+
+bool
+DYLDRendezvous::AddSOEntriesFromRemote(LoadedModuleInfoList &module_list)
+{
+    for (auto const & modInfo : module_list.m_list)
+    {
+        bool found = false;
+        for (auto const & existing : m_loaded_modules.m_list)
+        {
+            if (modInfo == existing)
+            {
+                found = true;
+                break;
+            }
+        }
+
+        if (found)
+            continue;
+
+        SOEntry entry;
+        if (!FillSOEntryFromModuleInfo(modInfo, entry))
+            return false;
+
+        // Only add shared libraries and not the executable.
+        if (!SOEntryIsMainExecutable(entry))
+            m_soentries.push_back(entry);
+    }
+
+    m_loaded_modules = module_list;
+    return true;
+}
+
+bool
+DYLDRendezvous::RemoveSOEntriesFromRemote(LoadedModuleInfoList &module_list)
+{
+    for (auto const & existing : m_loaded_modules.m_list)
+    {
+        bool found = false;
+        for (auto const & modInfo : module_list.m_list)
+        {
+            if (modInfo == existing)
+            {
+                found = true;
+                break;
+            }
+        }
+
+        if (found)
+            continue;
+
+        SOEntry entry;
+        if (!FillSOEntryFromModuleInfo(existing, entry))
+            return false;
+
+        // Only add shared libraries and not the executable.
+        if (!SOEntryIsMainExecutable(entry))
+        {
+            auto pos = std::find(m_soentries.begin(), m_soentries.end(), entry);
+            if (pos == m_soentries.end())
+                return false;
+
+            m_soentries.erase(pos);
+        }
+    }
+
+    m_loaded_modules = module_list;
+    return true;
+}
+
+bool
+DYLDRendezvous::AddSOEntries()
 {
     SOEntry entry;
     iterator pos;
@@ -263,7 +393,7 @@ DYLDRendezvous::UpdateSOEntriesForAddition()
 }
 
 bool
-DYLDRendezvous::UpdateSOEntriesForDeletion()
+DYLDRendezvous::RemoveSOEntries()
 {
     SOEntryList entry_list;
     iterator pos;
@@ -291,7 +421,8 @@ DYLDRendezvous::SOEntryIsMainExecutable(const SOEntry &entry)
     // FreeBSD and on Android it is the full path to the executable.
 
     auto triple = m_process->GetTarget().GetArchitecture().GetTriple();
-    switch (triple.getOS()) {
+    switch (triple.getOS())
+    {
         case llvm::Triple::FreeBSD:
             return entry.file_spec == m_exe_file_spec;
         case llvm::Triple::Linux:
@@ -386,6 +517,21 @@ isLoadBiasIncorrect(Target& target, const std::string& file_path)
     return false;
 }
 
+void
+DYLDRendezvous::UpdateBaseAddrIfNecessary(SOEntry &entry, std::string const &file_path)
+{
+    // If the load bias reported by the linker is incorrect then fetch the load address of the file
+    // from the proc file system.
+    if (isLoadBiasIncorrect(m_process->GetTarget(), file_path))
+    {
+        lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
+        bool is_loaded = false;
+        Error error = m_process->GetFileLoadAddress(entry.file_spec, is_loaded, load_addr);
+        if (error.Success() && is_loaded)
+            entry.base_addr = load_addr;
+    }
+}
+
 bool
 DYLDRendezvous::ReadSOEntryFromMemory(lldb::addr_t addr, SOEntry &entry)
 {
@@ -427,16 +573,7 @@ DYLDRendezvous::ReadSOEntryFromMemory(lldb::addr_t addr, SOEntry &entry)
     std::string file_path = ReadStringFromMemory(entry.path_addr);
     entry.file_spec.SetFile(file_path, false);
 
-    // If the load bias reported by the linker is incorrect then fetch the load address of the file
-    // from the proc file system.
-    if (isLoadBiasIncorrect(m_process->GetTarget(), file_path))
-    {
-        lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
-        bool is_loaded = false;
-        Error error = m_process->GetFileLoadAddress(entry.file_spec, is_loaded, load_addr);
-        if (error.Success() && is_loaded)
-            entry.base_addr = load_addr;
-    }
+    UpdateBaseAddrIfNecessary(entry, file_path);
 
     return true;
 }
diff --git a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h
index ec5af94..8498116 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h
@@ -20,6 +20,10 @@
 #include "lldb/lldb-types.h"
 #include "lldb/Host/FileSpec.h"
 
+#include "lldb/Core/LoadedModuleInfoList.h"
+
+using lldb_private::LoadedModuleInfoList;
+
 namespace lldb_private {
 class Process;
 }
@@ -201,6 +205,9 @@ protected:
     Rendezvous m_current;
     Rendezvous m_previous;
 
+    /// List of currently loaded SO modules
+    LoadedModuleInfoList m_loaded_modules;
+
     /// List of SOEntry objects corresponding to the current link map state.
     SOEntryList m_soentries;
 
@@ -240,13 +247,29 @@ protected:
     /// Updates the current set of SOEntries, the set of added entries, and the
     /// set of removed entries.
     bool
-    UpdateSOEntries();
+    UpdateSOEntries(bool fromRemote = false);
+
+    bool
+    FillSOEntryFromModuleInfo (LoadedModuleInfoList::LoadedModuleInfo const & modInfo,
+                               SOEntry &entry);
+
+    bool
+    SaveSOEntriesFromRemote(LoadedModuleInfoList &module_list);
 
     bool
-    UpdateSOEntriesForAddition();
+    AddSOEntriesFromRemote(LoadedModuleInfoList &module_list);
 
     bool
-    UpdateSOEntriesForDeletion();
+    RemoveSOEntriesFromRemote(LoadedModuleInfoList &module_list);
+
+    bool
+    AddSOEntries();
+
+    bool
+    RemoveSOEntries();
+
+    void
+    UpdateBaseAddrIfNecessary(SOEntry &entry, std::string const &file_path);
 
     bool
     SOEntryIsMainExecutable(const SOEntry &entry);
diff --git a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index c9bc4b6..f4d6b19 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -36,6 +36,7 @@
 #include "lldb/Symbol/Function.h"
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Symbol/SymbolFile.h"
 #include "lldb/Symbol/SymbolVendor.h"
 #include "lldb/Symbol/Type.h"
 #include "lldb/Symbol/TypeList.h"
@@ -570,6 +571,63 @@ FindCodeSymbolInContext
     }
 }
 
+ConstString
+FindBestAlternateMangledName
+(
+    const ConstString &demangled,
+    const LanguageType &lang_type,
+    SymbolContext &sym_ctx
+)
+{
+    CPlusPlusLanguage::MethodName cpp_name(demangled);
+    std::string scope_qualified_name = cpp_name.GetScopeQualifiedName();
+
+    if (!scope_qualified_name.size())
+        return ConstString();
+
+    if (!sym_ctx.module_sp)
+        return ConstString();
+
+    SymbolVendor *sym_vendor = sym_ctx.module_sp->GetSymbolVendor();
+    if (!sym_vendor)
+        return ConstString();
+
+    lldb_private::SymbolFile *sym_file = sym_vendor->GetSymbolFile();
+    if (!sym_file)
+        return ConstString();
+
+    std::vector<ConstString> alternates;
+    sym_file->GetMangledNamesForFunction(scope_qualified_name, alternates);
+
+    std::vector<ConstString> param_and_qual_matches;
+    std::vector<ConstString> param_matches;
+    for (size_t i = 0; i < alternates.size(); i++)
+    {
+        ConstString alternate_mangled_name = alternates[i];
+        Mangled mangled(alternate_mangled_name, true);
+        ConstString demangled = mangled.GetDemangledName(lang_type);
+
+        CPlusPlusLanguage::MethodName alternate_cpp_name(demangled);
+        if (!cpp_name.IsValid())
+            continue;
+
+        if (alternate_cpp_name.GetArguments() == cpp_name.GetArguments())
+        {
+            if (alternate_cpp_name.GetQualifiers() == cpp_name.GetQualifiers())
+                param_and_qual_matches.push_back(alternate_mangled_name);
+            else
+                param_matches.push_back(alternate_mangled_name);
+        }
+    }
+
+    if (param_and_qual_matches.size())
+        return param_and_qual_matches[0]; // It is assumed that there will be only one!
+    else if (param_matches.size())
+        return param_matches[0]; // Return one of them as a best match
+    else
+        return ConstString();
+}
+
 bool
 ClangExpressionDeclMap::GetFunctionAddress
 (
@@ -603,15 +661,25 @@ ClangExpressionDeclMap::GetFunctionAddress
             if (Language::LanguageIsCPlusPlus(lang_type) &&
                 CPlusPlusLanguage::IsCPPMangledName(name.AsCString()))
             {
-                // 1. Demangle the name
                 Mangled mangled(name, true);
                 ConstString demangled = mangled.GetDemangledName(lang_type);
 
                 if (demangled)
                 {
-                    FindCodeSymbolInContext(
-                        demangled, m_parser_vars->m_sym_ctx, eFunctionNameTypeFull, sc_list);
-                    sc_list_size = sc_list.GetSize();
+                    ConstString best_alternate_mangled_name = FindBestAlternateMangledName(demangled, lang_type, sc);
+                    if (best_alternate_mangled_name)
+                    {
+                        FindCodeSymbolInContext(
+                            best_alternate_mangled_name, m_parser_vars->m_sym_ctx, eFunctionNameTypeAuto, sc_list);
+                        sc_list_size = sc_list.GetSize();
+                    }
+
+                    if (sc_list_size == 0)
+                    {
+                        FindCodeSymbolInContext(
+                            demangled, m_parser_vars->m_sym_ctx, eFunctionNameTypeFull, sc_list);
+                        sc_list_size = sc_list.GetSize();
+                    }
                 }
             }
         }
diff --git a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
index 37b7bd1d..509c594 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
@@ -36,7 +36,6 @@
 #include "lldb/Host/Endian.h"
 #include "lldb/Symbol/ClangASTContext.h"
 #include "lldb/Symbol/CompilerType.h"
-#include "lldb/Target/CPPLanguageRuntime.h"
 
 #include <map>
 
@@ -230,36 +229,6 @@ IRForTarget::GetFunctionAddress (llvm::Function *fun,
         {
             std::vector<lldb_private::ConstString> alternates;
             bool found_it = m_decl_map->GetFunctionAddress (name, fun_addr);
-            if (!found_it)
-            {
-                if (log)
-                    log->Printf("Address of function \"%s\" not found.\n", name.GetCString());
-                // Check for an alternate mangling for names from the standard library.
-                // For example, "std::basic_string<...>" has an alternate mangling scheme per
-                // the Itanium C++ ABI.
-                lldb::ProcessSP process_sp = m_data_allocator.GetTarget()->GetProcessSP();
-                if (process_sp)
-                {
-                    lldb_private::CPPLanguageRuntime *cpp_runtime = process_sp->GetCPPLanguageRuntime();
-                    if (cpp_runtime && cpp_runtime->GetAlternateManglings(name, alternates))
-                    {
-                        for (size_t i = 0; i < alternates.size(); ++i)
-                        {
-                            const lldb_private::ConstString &alternate_name = alternates[i];
-                            if (log)
-                                log->Printf("Looking up address of function \"%s\" with alternate name \"%s\"",
-                                            name.GetCString(), alternate_name.GetCString());
-                            if ((found_it = m_decl_map->GetFunctionAddress (alternate_name, fun_addr)))
-                            {
-                                if (log)
-                                    log->Printf("Found address of function \"%s\" with alternate name \"%s\"",
-                                                name.GetCString(), alternate_name.GetCString());
-                                break;
-                            }
-                        }
-                    }
-                }
-            }
 
             if (!found_it)
             {
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp b/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
index 28eba09..9c09d38 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
@@ -549,45 +549,45 @@ EmulateInstructionMIPS64::GetOpcodeForInstruction (const char *op_name)
         //----------------------------------------------------------------------
         // Branch instructions
         //----------------------------------------------------------------------
-        { "BEQ",        &EmulateInstructionMIPS64::Emulate_BEQ,         "BEQ rs,rt,offset"          },
-        { "BNE",        &EmulateInstructionMIPS64::Emulate_BNE,         "BNE rs,rt,offset"          },
-        { "BEQL",       &EmulateInstructionMIPS64::Emulate_BEQL,        "BEQL rs,rt,offset"         },
-        { "BNEL",       &EmulateInstructionMIPS64::Emulate_BNEL,        "BNEL rs,rt,offset"         },
-        { "BGEZALL",    &EmulateInstructionMIPS64::Emulate_BGEZALL,     "BGEZALL rt,offset"         },
+        { "BEQ",        &EmulateInstructionMIPS64::Emulate_BXX_3ops,    "BEQ rs,rt,offset"          },
+        { "BNE",        &EmulateInstructionMIPS64::Emulate_BXX_3ops,    "BNE rs,rt,offset"          },
+        { "BEQL",       &EmulateInstructionMIPS64::Emulate_BXX_3ops,    "BEQL rs,rt,offset"         },
+        { "BNEL",       &EmulateInstructionMIPS64::Emulate_BXX_3ops,    "BNEL rs,rt,offset"         },
+        { "BGEZALL",    &EmulateInstructionMIPS64::Emulate_Bcond_Link,  "BGEZALL rt,offset"         },
         { "BAL",        &EmulateInstructionMIPS64::Emulate_BAL,         "BAL offset"                },
-        { "BGEZAL",     &EmulateInstructionMIPS64::Emulate_BGEZAL,      "BGEZAL rs,offset"          },
+        { "BGEZAL",     &EmulateInstructionMIPS64::Emulate_Bcond_Link,  "BGEZAL rs,offset"          },
         { "BALC",       &EmulateInstructionMIPS64::Emulate_BALC,        "BALC offset"               },
         { "BC",         &EmulateInstructionMIPS64::Emulate_BC,          "BC offset"                 },
-        { "BGEZ",       &EmulateInstructionMIPS64::Emulate_BGEZ,        "BGEZ rs,offset"            },
-        { "BLEZALC",    &EmulateInstructionMIPS64::Emulate_BLEZALC,     "BLEZALC rs,offset"         },
-        { "BGEZALC",    &EmulateInstructionMIPS64::Emulate_BGEZALC,     "BGEZALC rs,offset"         },
-        { "BLTZALC",    &EmulateInstructionMIPS64::Emulate_BLTZALC,     "BLTZALC rs,offset"         },
-        { "BGTZALC",    &EmulateInstructionMIPS64::Emulate_BGTZALC,     "BGTZALC rs,offset"         },
-        { "BEQZALC",    &EmulateInstructionMIPS64::Emulate_BEQZALC,     "BEQZALC rs,offset"         },
-        { "BNEZALC",    &EmulateInstructionMIPS64::Emulate_BNEZALC,     "BNEZALC rs,offset"         },
-        { "BEQC",       &EmulateInstructionMIPS64::Emulate_BEQC,        "BEQC rs,rt,offset"         },
-        { "BNEC",       &EmulateInstructionMIPS64::Emulate_BNEC,        "BNEC rs,rt,offset"         },
-        { "BLTC",       &EmulateInstructionMIPS64::Emulate_BLTC,        "BLTC rs,rt,offset"         },
-        { "BGEC",       &EmulateInstructionMIPS64::Emulate_BGEC,        "BGEC rs,rt,offset"         },
-        { "BLTUC",      &EmulateInstructionMIPS64::Emulate_BLTUC,       "BLTUC rs,rt,offset"        },
-        { "BGEUC",      &EmulateInstructionMIPS64::Emulate_BGEUC,       "BGEUC rs,rt,offset"        },
-        { "BLTZC",      &EmulateInstructionMIPS64::Emulate_BLTZC,       "BLTZC rt,offset"           },
-        { "BLEZC",      &EmulateInstructionMIPS64::Emulate_BLEZC,       "BLEZC rt,offset"           },
-        { "BGEZC",      &EmulateInstructionMIPS64::Emulate_BGEZC,       "BGEZC rt,offset"           },
-        { "BGTZC",      &EmulateInstructionMIPS64::Emulate_BGTZC,       "BGTZC rt,offset"           },
-        { "BEQZC",      &EmulateInstructionMIPS64::Emulate_BEQZC,       "BEQZC rt,offset"           },
-        { "BNEZC",      &EmulateInstructionMIPS64::Emulate_BNEZC,       "BNEZC rt,offset"           },
-        { "BGEZL",      &EmulateInstructionMIPS64::Emulate_BGEZL,       "BGEZL rt,offset"           },
-        { "BGTZ",       &EmulateInstructionMIPS64::Emulate_BGTZ,        "BGTZ rt,offset"            },
-        { "BGTZL",      &EmulateInstructionMIPS64::Emulate_BGTZL,       "BGTZL rt,offset"           },
-        { "BLEZ",       &EmulateInstructionMIPS64::Emulate_BLEZ,        "BLEZ rt,offset"            },
-        { "BLEZL",      &EmulateInstructionMIPS64::Emulate_BLEZL,       "BLEZL rt,offset"           },
-        { "BLTZ",       &EmulateInstructionMIPS64::Emulate_BLTZ,        "BLTZ rt,offset"            },
-        { "BLTZAL",     &EmulateInstructionMIPS64::Emulate_BLTZAL,      "BLTZAL rt,offset"          },
-        { "BLTZALL",    &EmulateInstructionMIPS64::Emulate_BLTZALL,     "BLTZALL rt,offset"         },
-        { "BLTZL",      &EmulateInstructionMIPS64::Emulate_BLTZL,       "BLTZL rt,offset"           },
-        { "BOVC",       &EmulateInstructionMIPS64::Emulate_BOVC,        "BOVC rs,rt,offset"         },
-        { "BNVC",       &EmulateInstructionMIPS64::Emulate_BNVC,        "BNVC rs,rt,offset"         },
+        { "BGEZ",       &EmulateInstructionMIPS64::Emulate_BXX_2ops,    "BGEZ rs,offset"            },
+        { "BLEZALC",    &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BLEZALC rs,offset"         },
+        { "BGEZALC",    &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BGEZALC rs,offset"         },
+        { "BLTZALC",    &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BLTZALC rs,offset"         },
+        { "BGTZALC",    &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BGTZALC rs,offset"         },
+        { "BEQZALC",    &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BEQZALC rs,offset"         },
+        { "BNEZALC",    &EmulateInstructionMIPS64::Emulate_Bcond_Link_C,"BNEZALC rs,offset"         },
+        { "BEQC",       &EmulateInstructionMIPS64::Emulate_BXX_3ops_C,  "BEQC rs,rt,offset"         },
+        { "BNEC",       &EmulateInstructionMIPS64::Emulate_BXX_3ops_C,  "BNEC rs,rt,offset"         },
+        { "BLTC",       &EmulateInstructionMIPS64::Emulate_BXX_3ops_C,  "BLTC rs,rt,offset"         },
+        { "BGEC",       &EmulateInstructionMIPS64::Emulate_BXX_3ops_C,  "BGEC rs,rt,offset"         },
+        { "BLTUC",      &EmulateInstructionMIPS64::Emulate_BXX_3ops_C,  "BLTUC rs,rt,offset"        },
+        { "BGEUC",      &EmulateInstructionMIPS64::Emulate_BXX_3ops_C,  "BGEUC rs,rt,offset"        },
+        { "BLTZC",      &EmulateInstructionMIPS64::Emulate_BXX_2ops_C,  "BLTZC rt,offset"           },
+        { "BLEZC",      &EmulateInstructionMIPS64::Emulate_BXX_2ops_C,  "BLEZC rt,offset"           },
+        { "BGEZC",      &EmulateInstructionMIPS64::Emulate_BXX_2ops_C,  "BGEZC rt,offset"           },
+        { "BGTZC",      &EmulateInstructionMIPS64::Emulate_BXX_2ops_C,  "BGTZC rt,offset"           },
+        { "BEQZC",      &EmulateInstructionMIPS64::Emulate_BXX_2ops_C,  "BEQZC rt,offset"           },
+        { "BNEZC",      &EmulateInstructionMIPS64::Emulate_BXX_2ops_C,  "BNEZC rt,offset"           },
+        { "BGEZL",      &EmulateInstructionMIPS64::Emulate_BXX_2ops,    "BGEZL rt,offset"           },
+        { "BGTZ",       &EmulateInstructionMIPS64::Emulate_BXX_2ops,    "BGTZ rt,offset"            },
+        { "BGTZL",      &EmulateInstructionMIPS64::Emulate_BXX_2ops,    "BGTZL rt,offset"           },
+        { "BLEZ",       &EmulateInstructionMIPS64::Emulate_BXX_2ops,    "BLEZ rt,offset"            },
+        { "BLEZL",      &EmulateInstructionMIPS64::Emulate_BXX_2ops,    "BLEZL rt,offset"           },
+        { "BLTZ",       &EmulateInstructionMIPS64::Emulate_BXX_2ops,    "BLTZ rt,offset"            },
+        { "BLTZAL",     &EmulateInstructionMIPS64::Emulate_Bcond_Link,  "BLTZAL rt,offset"          },
+        { "BLTZALL",    &EmulateInstructionMIPS64::Emulate_Bcond_Link,  "BLTZALL rt,offset"         },
+        { "BLTZL",      &EmulateInstructionMIPS64::Emulate_BXX_2ops,    "BLTZL rt,offset"           },
+        { "BOVC",       &EmulateInstructionMIPS64::Emulate_BXX_3ops_C,  "BOVC rs,rt,offset"         },
+        { "BNVC",       &EmulateInstructionMIPS64::Emulate_BXX_3ops_C,  "BNVC rs,rt,offset"         },
         { "J",          &EmulateInstructionMIPS64::Emulate_J,           "J target"                  },
         { "JAL",        &EmulateInstructionMIPS64::Emulate_JAL,         "JAL target"                },
         { "JALX",       &EmulateInstructionMIPS64::Emulate_JAL,         "JALX target"               },
@@ -597,16 +597,16 @@ EmulateInstructionMIPS64::GetOpcodeForInstruction (const char *op_name)
         { "JIC",        &EmulateInstructionMIPS64::Emulate_JIC,         "JIC rt,offset"             },
         { "JR",         &EmulateInstructionMIPS64::Emulate_JR,          "JR target"                 },
         { "JR_HB",      &EmulateInstructionMIPS64::Emulate_JR,          "JR.HB target"              },
-        { "BC1F",       &EmulateInstructionMIPS64::Emulate_BC1F,        "BC1F cc, offset"           },
-        { "BC1T",       &EmulateInstructionMIPS64::Emulate_BC1T,        "BC1T cc, offset"           },
-        { "BC1FL",      &EmulateInstructionMIPS64::Emulate_BC1FL,       "BC1FL cc, offset"          },
-        { "BC1TL",      &EmulateInstructionMIPS64::Emulate_BC1TL,       "BC1TL cc, offset"          },
+        { "BC1F",       &EmulateInstructionMIPS64::Emulate_FP_branch,   "BC1F cc, offset"           },
+        { "BC1T",       &EmulateInstructionMIPS64::Emulate_FP_branch,   "BC1T cc, offset"           },
+        { "BC1FL",      &EmulateInstructionMIPS64::Emulate_FP_branch,   "BC1FL cc, offset"          },
+        { "BC1TL",      &EmulateInstructionMIPS64::Emulate_FP_branch,   "BC1TL cc, offset"          },
         { "BC1EQZ",     &EmulateInstructionMIPS64::Emulate_BC1EQZ,      "BC1EQZ ft, offset"         },
         { "BC1NEZ",     &EmulateInstructionMIPS64::Emulate_BC1NEZ,      "BC1NEZ ft, offset"         },
-        { "BC1ANY2F",   &EmulateInstructionMIPS64::Emulate_BC1ANY2F,    "BC1ANY2F cc, offset"       },
-        { "BC1ANY2T",   &EmulateInstructionMIPS64::Emulate_BC1ANY2T,    "BC1ANY2T cc, offset"       },
-        { "BC1ANY4F",   &EmulateInstructionMIPS64::Emulate_BC1ANY4F,    "BC1ANY4F cc, offset"       },
-        { "BC1ANY4T",   &EmulateInstructionMIPS64::Emulate_BC1ANY4T,    "BC1ANY4T cc, offset"       },
+        { "BC1ANY2F",   &EmulateInstructionMIPS64::Emulate_3D_branch,   "BC1ANY2F cc, offset"       },
+        { "BC1ANY2T",   &EmulateInstructionMIPS64::Emulate_3D_branch,   "BC1ANY2T cc, offset"       },
+        { "BC1ANY4F",   &EmulateInstructionMIPS64::Emulate_3D_branch,   "BC1ANY4F cc, offset"       },
+        { "BC1ANY4T",   &EmulateInstructionMIPS64::Emulate_3D_branch,   "BC1ANY4T cc, offset"       },
         { "BNZ_B",      &EmulateInstructionMIPS64::Emulate_BNZB,        "BNZ.b wt,s16"              },
         { "BNZ_H",      &EmulateInstructionMIPS64::Emulate_BNZH,        "BNZ.h wt,s16"              },
         { "BNZ_W",      &EmulateInstructionMIPS64::Emulate_BNZW,        "BNZ.w wt,s16"              },
@@ -907,107 +907,20 @@ EmulateInstructionMIPS64::Emulate_LD (llvm::MCInst& insn)
     return false;
 }
 
-bool
-EmulateInstructionMIPS64::Emulate_BEQ (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target, rs_val, rt_val;
-
-    /*
-     * BEQ rs, rt, offset
-     *      condition <- (GPR[rs] = GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-     *      else
-     *          PC = PC + 4
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val == rt_val)
-        target = pc + offset;
-    else
-        target = pc + 8;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BNE (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target, rs_val, rt_val;
-
-    /*
-     * BNE rs, rt, offset
-     *      condition <- (GPR[rs] != GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val != rt_val)
-        target = pc + offset;
-    else
-        target = pc + 8;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
 
+/*
+    Emulate below MIPS branch instructions.
+    BEQ, BNE : Branch on condition
+    BEQL, BNEL : Branch likely
+*/
 bool
-EmulateInstructionMIPS64::Emulate_BEQL (llvm::MCInst& insn)
+EmulateInstructionMIPS64::Emulate_BXX_3ops (llvm::MCInst& insn)
 {
     bool success = false;
     uint32_t rs, rt;
-    int64_t offset, pc, target, rs_val, rt_val;
+    int64_t offset, pc, rs_val, rt_val, target = 0;
+    const char *op_name = m_insn_info->getName (insn.getOpcode ());
 
-    /*
-     * BEQL rs, rt, offset
-     *      condition <- (GPR[rs] = GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
     rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
     rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
     offset = insn.getOperand(2).getImm();
@@ -1024,290 +937,26 @@ EmulateInstructionMIPS64::Emulate_BEQL (llvm::MCInst& insn)
     if (!success)
         return false;
 
-    if (rs_val == rt_val)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BNEL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target, rs_val, rt_val;
-
-    /*
-     * BNEL rs, rt, offset
-     *      condition <- (GPR[rs] != GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val != rt_val)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGEZL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target; 
-    int64_t rs_val;
-
-    /*
-     * BGEZL rs, offset
-     *      condition <- (GPR[rs] >= 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val >= 0)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLTZL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BLTZL rs, offset
-     *      condition <- (GPR[rs] < 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val < 0)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGTZL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BGTZL rs, offset
-     *      condition <- (GPR[rs] > 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val > 0)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLEZL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BLEZL rs, offset
-     *      condition <- (GPR[rs] <= 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val <= 0)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGTZ (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BGTZ rs, offset
-     *      condition <- (GPR[rs] > 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val > 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLEZ (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target; 
-    int64_t rs_val;
-
-    /*
-     * BLEZ rs, offset
-     *      condition <- (GPR[rs] <= 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val <= 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
+    if (!strcasecmp (op_name, "BEQ") ||
+        !strcasecmp (op_name, "BEQL"))
+    {
+        if (rs_val == rt_val)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BNE") ||
+             !strcasecmp (op_name, "BNEL"))
+    {
+        if (rs_val != rt_val)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
 
     Context context;
     context.type = eContextRelativeBranchImmediate;
+    context.SetImmediate (offset);
 
     if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
         return false;
@@ -1315,20 +964,20 @@ EmulateInstructionMIPS64::Emulate_BLEZ (llvm::MCInst& insn)
     return true;
 }
 
+/* 
+    Emulate below MIPS Non-Compact conditional branch and link instructions.
+    BLTZAL, BGEZAL      :
+    BLTZALL, BGEZALL    : Branch likely
+*/
 bool
-EmulateInstructionMIPS64::Emulate_BLTZ (llvm::MCInst& insn)
+EmulateInstructionMIPS64::Emulate_Bcond_Link (llvm::MCInst& insn)
 {
     bool success = false;
     uint32_t rs;
-    int64_t offset, pc, target;
+    int64_t offset, pc, target = 0;
     int64_t rs_val;
+    const char *op_name = m_insn_info->getName (insn.getOpcode ());
 
-    /*
-     * BLTZ rs, offset
-     *      condition <- (GPR[rs] < 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
     rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
     offset = insn.getOperand(1).getImm();
 
@@ -1340,52 +989,24 @@ EmulateInstructionMIPS64::Emulate_BLTZ (llvm::MCInst& insn)
     if (!success)
         return false;
 
-    if (rs_val < 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGEZALL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BGEZALL rt, offset
-     *      condition <- (GPR[rs] >= 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val >= 0)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
+    if (!strcasecmp (op_name, "BLTZAL") ||
+        !strcasecmp (op_name, "BLTZALL"))
+    {
+        if (rs_val < 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BGEZAL") ||
+             !strcasecmp (op_name, "BGEZALL"))
+    {
+        if (rs_val >= 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
 
     Context context;
-    context.type = eContextRelativeBranchImmediate;
 
     if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
         return false;
@@ -1458,65 +1079,18 @@ EmulateInstructionMIPS64::Emulate_BALC (llvm::MCInst& insn)
     return true;
 }
 
+/* 
+    Emulate below MIPS conditional branch and link instructions.
+    BLEZALC, BGEZALC, BLTZALC, BGTZALC, BEQZALC, BNEZALC : Compact branches
+*/
 bool
-EmulateInstructionMIPS64::Emulate_BGEZAL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BGEZAL rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] >= 0)
-     *      if condition then     
-     *          RA = PC + 8
-     *          PC = PC + offset
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    Context context;
-
-    if ((int64_t) rs_val >= 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 8))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLTZAL (llvm::MCInst& insn)
+EmulateInstructionMIPS64::Emulate_Bcond_Link_C (llvm::MCInst& insn)
 {
     bool success = false;
     uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
+    int64_t offset, pc, rs_val, target = 0;
+    const char *op_name = m_insn_info->getName (insn.getOpcode ());
 
-    /*
-     * BLTZAL rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] < 0)
-     *      if condition then     
-     *          RA = PC + 8
-     *          PC = PC + offset
-    */
     rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
     offset = insn.getOperand(1).getImm();
 
@@ -1528,271 +1102,51 @@ EmulateInstructionMIPS64::Emulate_BLTZAL (llvm::MCInst& insn)
     if (!success)
         return false;
 
-    Context context;
-
-    if ((int64_t) rs_val < 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 8))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLTZALL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BLTZALL rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] < 0)
-     *      if condition then     
-     *          RA = PC + 8
-     *          PC = PC + offset
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    Context context;
-
-    if (rs_val < 0)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 8))
-        return false;
-
-    return true;
-}
-
-
-bool
-EmulateInstructionMIPS64::Emulate_BLEZALC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BLEZALC rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] <= 0)
-     *      if condition then     
-     *          RA = PC + 4
-     *          PC = PC + offset
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    Context context;
-
-    if (rs_val <= 0)
-        target = pc + offset;
-    else
-        target = pc + 4;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGEZALC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BGEZALC rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] >= 0)
-     *      if condition then     
-     *          RA = PC + 4
-     *          PC = PC + offset
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    Context context;
-
-    if (rs_val >= 0)
-        target = pc + offset;
-    else
-        target = pc + 4;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLTZALC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BLTZALC rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] < 0)
-     *      if condition then     
-     *          RA = PC + 4
-     *          PC = PC + offset
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    Context context;
-
-    if (rs_val < 0)
-        target = pc + offset;
-    else
-        target = pc + 4;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGTZALC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BGTZALC rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] > 0)
-     *      if condition then     
-     *          RA = PC + 4
-     *          PC = PC + offset
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    Context context;
-
-    if (rs_val > 0)
-        target = pc + offset;
-    else
-        target = pc + 4;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BEQZALC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target, rs_val;
-
-    /*
-     * BEQZALC rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] == 0)
-     *      if condition then     
-     *          RA = PC + 4
-     *          PC = PC + offset
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
+    if (!strcasecmp (op_name, "BLEZALC"))
+    {
+        if (rs_val <= 0)
+            target = pc + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BGEZALC"))
+    {
+        if (rs_val >= 0)
+            target = pc + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BLTZALC"))
+    {
+        if (rs_val < 0)
+            target = pc + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BGTZALC"))
+    {
+        if (rs_val > 0)
+            target = pc + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BEQZALC"))
+    {
+        if (rs_val == 0)
+            target = pc + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BNEZALC"))
+    {
+        if (rs_val != 0)
+            target = pc + offset;
+        else
+            target = pc + 4;
+    }
 
     Context context;
 
-    if (rs_val == 0)
-        target = pc + offset;
-    else
-        target = pc + 4;
-
     if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
         return false;
 
@@ -1802,21 +1156,19 @@ EmulateInstructionMIPS64::Emulate_BEQZALC (llvm::MCInst& insn)
     return true;
 }
 
+/* 
+    Emulate below MIPS branch instructions.
+    BLTZL, BGEZL, BGTZL, BLEZL : Branch likely
+    BLTZ, BGEZ, BGTZ, BLEZ     : Non-compact branches
+*/
 bool
-EmulateInstructionMIPS64::Emulate_BNEZALC (llvm::MCInst& insn)
+EmulateInstructionMIPS64::Emulate_BXX_2ops (llvm::MCInst& insn)
 {
     bool success = false;
     uint32_t rs;
-    int64_t offset, pc, target, rs_val;
+    int64_t offset, pc, rs_val, target = 0;
+    const char *op_name = m_insn_info->getName (insn.getOpcode ());
 
-    /*
-     * BNEZALC rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] != 0)
-     *      if condition then     
-     *          RA = PC + 4
-     *          PC = PC + offset
-    */
     rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
     offset = insn.getOperand(1).getImm();
 
@@ -1828,53 +1180,42 @@ EmulateInstructionMIPS64::Emulate_BNEZALC (llvm::MCInst& insn)
     if (!success)
         return false;
 
-    Context context;
-
-    if (rs_val != 0)
-        target = pc + offset;
-    else
-        target = pc + 4;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_ra_mips64, pc + 4))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGEZ (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target, rs_val;
-
-    /*
-     * BGEZ rs,offset
-     *      offset = sign_ext (offset << 2)
-     *      condition <- (GPR[rs] >= 0)
-     *      if condition then     
-     *          PC = PC + offset
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
+    if (!strcasecmp (op_name, "BLTZL") ||
+        !strcasecmp (op_name, "BLTZ"))
+    {
+        if (rs_val < 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BGEZL") ||
+             !strcasecmp (op_name, "BGEZ"))
+    {
+        if (rs_val >= 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BGTZL") ||
+             !strcasecmp (op_name, "BGTZ"))
+    {
+        if (rs_val > 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BLEZL") ||
+             !strcasecmp (op_name, "BLEZ"))
+    {
+        if (rs_val <= 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
 
     Context context;
-
-    if (rs_val >= 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
+    context.type = eContextRelativeBranchImmediate;
+    context.SetImmediate (offset);
 
     if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
         return false;
@@ -1909,502 +1250,6 @@ EmulateInstructionMIPS64::Emulate_BC (llvm::MCInst& insn)
     return true;
 }
 
-bool
-EmulateInstructionMIPS64::Emulate_BEQC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target, rs_val, rt_val;
-
-    /*
-     * BEQC rs, rt, offset
-     *      condition <- (GPR[rs] = GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val == rt_val)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BNEC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target, rs_val, rt_val;
-
-    /*
-     * BNEC rs, rt, offset
-     *      condition <- (GPR[rs] != GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val != rt_val)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLTC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target;
-    int64_t rs_val, rt_val;
-
-    /*
-     * BLTC rs, rt, offset
-     *      condition <- (GPR[rs] < GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val < rt_val)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGEC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target;
-    int64_t rs_val, rt_val;
-
-    /*
-     * BGEC rs, rt, offset
-     *      condition <- (GPR[rs] > GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val > rt_val)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLTUC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target;
-    uint64_t rs_val, rt_val;
-
-    /*
-     * BLTUC rs, rt, offset
-     *      condition <- (GPR[rs] < GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val < rt_val)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGEUC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target;
-    uint64_t rs_val, rt_val;
-
-    /*
-     * BGEUC rs, rt, offset
-     *      condition <- (GPR[rs] > GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    rt_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val > rt_val)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLTZC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BLTZC rs, offset
-     *      condition <- (GPR[rs] < 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val < 0)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BLEZC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BLEZC rs, offset
-     *      condition <- (GPR[rs] <= 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val <= 0)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGEZC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BGEZC rs, offset
-     *      condition <- (GPR[rs] >= 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val >= 0)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BGTZC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    int64_t rs_val;
-
-    /*
-     * BGTZC rs, offset
-     *      condition <- (GPR[rs] > 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val > 0)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BEQZC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    uint64_t rs_val;
-
-    /*
-     * BEQZC rs, offset
-     *      condition <- (GPR[rs] = 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val == 0)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BNEZC (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t rs;
-    int64_t offset, pc, target;
-    uint64_t rs_val;
-
-    /*
-     * BNEZC rs, offset
-     *      condition <- (GPR[rs] != 0)
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
-    rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    rs_val = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rs, 0, &success);
-    if (!success)
-        return false;
-
-    if (rs_val != 0)
-        target = pc + 4 + offset;
-    else
-        target = pc + 4;
-
-    Context context;
-    context.type = eContextRelativeBranchImmediate;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
 static int
 IsAdd64bitOverflow (int64_t a, int64_t b)
 {
@@ -2412,20 +1257,19 @@ IsAdd64bitOverflow (int64_t a, int64_t b)
   return (a < 0 && b < 0 && r >= 0) || (a >= 0 && b >= 0 && r < 0);
 }
 
+/*
+    Emulate below MIPS branch instructions.
+    BEQC, BNEC, BLTC, BGEC, BLTUC, BGEUC, BOVC, BNVC: Compact branch instructions with no delay slot
+*/
 bool
-EmulateInstructionMIPS64::Emulate_BOVC (llvm::MCInst& insn)
+EmulateInstructionMIPS64::Emulate_BXX_3ops_C (llvm::MCInst& insn)
 {
     bool success = false;
     uint32_t rs, rt;
-    int64_t offset, pc, target;
-    int64_t rs_val, rt_val;
+    int64_t offset, pc, rs_val, rt_val, target = 0;
+    const char *op_name = m_insn_info->getName (insn.getOpcode ());
+    uint32_t current_inst_size = m_insn_info->get(insn.getOpcode()).getSize();
 
-    /*
-     * BOVC rs, rt, offset
-     *      condition <- overflow(GPR[rs] + GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
     rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
     rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
     offset = insn.getOperand(2).getImm();
@@ -2442,13 +1286,66 @@ EmulateInstructionMIPS64::Emulate_BOVC (llvm::MCInst& insn)
     if (!success)
         return false;
 
-    if (IsAdd64bitOverflow (rs_val, rt_val))
-        target = pc + offset;
-    else
-        target = pc + 4;
+    if (!strcasecmp (op_name, "BEQC"))
+    {
+        if (rs_val == rt_val)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BNEC"))
+    {
+        if (rs_val != rt_val)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BLTC"))
+    {
+        if (rs_val < rt_val)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BGEC"))
+    {
+        if (rs_val >= rt_val)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BLTUC"))
+    {
+        if (rs_val < rt_val)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BGEUC"))
+    {
+        if ((uint32_t)rs_val >= (uint32_t)rt_val)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BOVC"))
+    {
+        if (IsAdd64bitOverflow (rs_val, rt_val))
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BNVC"))
+    {
+        if (!IsAdd64bitOverflow (rs_val, rt_val))
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
 
     Context context;
     context.type = eContextRelativeBranchImmediate;
+    context.SetImmediate (current_inst_size + offset);
 
     if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
         return false;
@@ -2456,23 +1353,22 @@ EmulateInstructionMIPS64::Emulate_BOVC (llvm::MCInst& insn)
     return true;
 }
 
+/* 
+    Emulate below MIPS branch instructions.
+    BLTZC, BLEZC, BGEZC, BGTZC, BEQZC, BNEZC : Compact Branches
+*/
 bool
-EmulateInstructionMIPS64::Emulate_BNVC (llvm::MCInst& insn)
+EmulateInstructionMIPS64::Emulate_BXX_2ops_C (llvm::MCInst& insn)
 {
     bool success = false;
-    uint32_t rs, rt;
-    int64_t offset, pc, target;
-    int64_t rs_val, rt_val;
+    uint32_t rs;
+    int64_t offset, pc, target = 0;
+    int64_t rs_val;
+    const char *op_name = m_insn_info->getName (insn.getOpcode ());
+    uint32_t current_inst_size = m_insn_info->get(insn.getOpcode()).getSize();
 
-    /*
-     * BNVC rs, rt, offset
-     *      condition <- overflow(GPR[rs] + GPR[rt])
-     *      if condition then
-     *          PC = PC + sign_ext (offset << 2)
-    */
     rs = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    rt = m_reg_info->getEncodingValue (insn.getOperand(1).getReg());
-    offset = insn.getOperand(2).getImm();
+    offset = insn.getOperand(1).getImm();
 
     pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
     if (!success)
@@ -2482,17 +1378,52 @@ EmulateInstructionMIPS64::Emulate_BNVC (llvm::MCInst& insn)
     if (!success)
         return false;
 
-    rt_val = (int64_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_zero_mips64 + rt, 0, &success);
-    if (!success)
-        return false;
-
-    if (! IsAdd64bitOverflow (rs_val, rt_val))
-        target = pc + offset;
-    else
-        target = pc + 4;
+    if (!strcasecmp (op_name, "BLTZC"))
+    {
+        if (rs_val < 0)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BLEZC"))
+    {
+        if (rs_val <= 0)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BGEZC"))
+    {
+        if (rs_val >= 0)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BGTZC"))
+    {
+        if (rs_val > 0)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BEQZC"))
+    {
+        if (rs_val == 0)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
+    else if (!strcasecmp (op_name, "BNEZC"))
+    {
+        if (rs_val != 0)
+            target = pc + 4 + offset;
+        else
+            target = pc + 4;
+    }
 
     Context context;
     context.type = eContextRelativeBranchImmediate;
+    context.SetImmediate (current_inst_size + offset);
 
     if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
         return false;
@@ -2684,95 +1615,19 @@ EmulateInstructionMIPS64::Emulate_JR (llvm::MCInst& insn)
     return true;
 }
 
+/*
+    Emulate Branch on FP True/False
+    BC1F, BC1FL :   Branch on FP False (L stands for branch likely)
+    BC1T, BC1TL :   Branch on FP True  (L stands for branch likely)
+*/
 bool
-EmulateInstructionMIPS64::Emulate_BC1F (llvm::MCInst& insn)
+EmulateInstructionMIPS64::Emulate_FP_branch (llvm::MCInst& insn)
 {
     bool success = false;
     uint32_t cc, fcsr;
-    int64_t target, pc, offset;
-    
-    /*
-     * BC1F cc, offset
-     *  condition <- (FPConditionCode(cc) == 0)
-     *      if condition then
-     *          offset = sign_ext (offset)
-     *          PC = PC + offset
-    */
-    cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-    
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    fcsr = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    /* fcsr[23], fcsr[25-31] are vaild condition bits */
-    fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01);
-    
-    if ((fcsr & (1 << cc)) == 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
-    
-    Context context;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
+    int64_t pc, offset, target = 0;
+    const char *op_name = m_insn_info->getName (insn.getOpcode ());
 
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BC1T (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t cc, fcsr;
-    int64_t target, pc, offset;
-    
-    /*
-     * BC1T cc, offset
-     *  condition <- (FPConditionCode(cc) != 0)
-     *      if condition then
-     *          offset = sign_ext (offset)
-     *          PC = PC + offset
-    */
-    cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-    
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    fcsr = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    /* fcsr[23], fcsr[25-31] are vaild condition bits */
-    fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01);
-    
-    if ((fcsr & (1 << cc)) != 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
-    
-    Context context;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BC1FL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t cc, fcsr;
-    int64_t target, pc, offset;
-    
     /*
      * BC1F cc, offset
      *  condition <- (FPConditionCode(cc) == 0)
@@ -2782,48 +1637,7 @@ EmulateInstructionMIPS64::Emulate_BC1FL (llvm::MCInst& insn)
     */
     cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
     offset = insn.getOperand(1).getImm();
-    
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    fcsr = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    /* fcsr[23], fcsr[25-31] are vaild condition bits */
-    fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01);
-    
-    if ((fcsr & (1 << cc)) == 0)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-    
-    Context context;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
 
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BC1TL (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t cc, fcsr;
-    int64_t target, pc, offset;
-    
-    /*
-     * BC1T cc, offset
-     *  condition <- (FPConditionCode(cc) != 0)
-     *      if condition then
-     *          offset = sign_ext (offset)
-     *          PC = PC + offset
-    */
-    cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-    
     pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
     if (!success)
         return false;
@@ -2835,11 +1649,23 @@ EmulateInstructionMIPS64::Emulate_BC1TL (llvm::MCInst& insn)
     /* fcsr[23], fcsr[25-31] are vaild condition bits */
     fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01);
     
-    if ((fcsr & (1 << cc)) != 0)
-        target = pc + offset;
-    else
-        target = pc + 8;    /* skip delay slot */
-    
+    if (!strcasecmp (op_name, "BC1F") ||
+        !strcasecmp (op_name, "BC1FL"))
+    {    
+        if ((fcsr & (1 << cc)) == 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BC1T") ||
+             !strcasecmp (op_name, "BC1TL"))
+    {
+        if ((fcsr & (1 << cc)) != 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+
     Context context;
 
     if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
@@ -2926,109 +1752,19 @@ EmulateInstructionMIPS64::Emulate_BC1NEZ (llvm::MCInst& insn)
     return true;
 }
 
+/*
+    Emulate MIPS-3D Branch instructions
+    BC1ANY2F, BC1ANY2T  : Branch on Any of Two Floating Point Condition Codes False/True
+    BC1ANY4F, BC1ANY4T  : Branch on Any of Four Floating Point Condition Codes False/True
+*/
 bool
-EmulateInstructionMIPS64::Emulate_BC1ANY2F (llvm::MCInst& insn)
+EmulateInstructionMIPS64::Emulate_3D_branch (llvm::MCInst& insn)
 {
     bool success = false;
     uint32_t cc, fcsr;
-    int64_t target, pc, offset;
-    
-    /*
-     * BC1ANY2F cc, offset
-     *  condition <- (FPConditionCode(cc) == 0 
-     *                  || FPConditionCode(cc+1) == 0)
-     *      if condition then
-     *          offset = sign_ext (offset)
-     *          PC = PC + offset
-    */
-    cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-    
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    fcsr = (uint32_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    /* fcsr[23], fcsr[25-31] are vaild condition bits */
-    fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01);
-
-    /* if any one bit is 0 */
-    if (((fcsr >> cc) & 3) != 3)
-        target = pc + offset;
-    else
-        target = pc + 8;
-    
-    Context context;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BC1ANY2T (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t cc, fcsr;
-    int64_t target, pc, offset;
-    
-    /*
-     * BC1ANY2T cc, offset
-     *  condition <- (FPConditionCode(cc) == 1 
-     *                  || FPConditionCode(cc+1) == 1)
-     *      if condition then
-     *          offset = sign_ext (offset)
-     *          PC = PC + offset
-    */
-    cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-    
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    fcsr = (uint32_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    /* fcsr[23], fcsr[25-31] are vaild condition bits */
-    fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01);
-
-    /* if any one bit is 1 */
-    if (((fcsr >> cc) & 3) != 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
-    
-    Context context;
+    int64_t pc, offset, target = 0;
+    const char *op_name = m_insn_info->getName (insn.getOpcode ());
 
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BC1ANY4F (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t cc, fcsr;
-    int64_t target, pc, offset;
-    
-    /*
-     * BC1ANY4F cc, offset
-     *  condition <- (FPConditionCode(cc) == 0 
-     *                  || FPConditionCode(cc+1) == 0)
-     *                  || FPConditionCode(cc+2) == 0)
-     *                  || FPConditionCode(cc+3) == 0)
-     *      if condition then
-     *          offset = sign_ext (offset)
-     *          PC = PC + offset
-    */
     cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
     offset = insn.getOperand(1).getImm();
     
@@ -3043,57 +1779,39 @@ EmulateInstructionMIPS64::Emulate_BC1ANY4F (llvm::MCInst& insn)
     /* fcsr[23], fcsr[25-31] are vaild condition bits */
     fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01);
 
-    /* if any one bit is 0 */
-    if (((fcsr >> cc) & 0xf) != 0xf)
-        target = pc + offset;
-    else
-        target = pc + 8;
-    
-    Context context;
-
-    if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
-        return false;
-
-    return true;
-}
-
-bool
-EmulateInstructionMIPS64::Emulate_BC1ANY4T (llvm::MCInst& insn)
-{
-    bool success = false;
-    uint32_t cc, fcsr;
-    int64_t target, pc, offset;
-    
-    /*
-     * BC1ANY4T cc, offset
-     *  condition <- (FPConditionCode(cc) == 1 
-     *                  || FPConditionCode(cc+1) == 1)
-     *                  || FPConditionCode(cc+2) == 1)
-     *                  || FPConditionCode(cc+3) == 1)
-     *      if condition then
-     *          offset = sign_ext (offset)
-     *          PC = PC + offset
-    */
-    cc = m_reg_info->getEncodingValue (insn.getOperand(0).getReg());
-    offset = insn.getOperand(1).getImm();
-    
-    pc = ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_pc_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    fcsr = (uint32_t) ReadRegisterUnsigned (eRegisterKindDWARF, dwarf_fcsr_mips64, 0, &success);
-    if (!success)
-        return false;
-
-    /* fcsr[23], fcsr[25-31] are vaild condition bits */
-    fcsr = ((fcsr >> 24) & 0xfe) | ((fcsr >> 23) & 0x01);
+    if (!strcasecmp (op_name, "BC1ANY2F"))
+    {
+        /* if any one bit is 0 */
+        if (((fcsr >> cc) & 3) != 3)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BC1ANY2T"))
+    {
+        /* if any one bit is 1 */
+        if (((fcsr >> cc) & 3) != 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BC1ANY4F"))
+    {
+        /* if any one bit is 0 */
+        if (((fcsr >> cc) & 0xf) != 0xf)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
+    else if (!strcasecmp (op_name, "BC1ANY4T"))
+    {
+        /* if any one bit is 1 */
+        if (((fcsr >> cc) & 0xf) != 0)
+            target = pc + offset;
+        else
+            target = pc + 8;
+    }
 
-    /* if any one bit is 1 */
-    if (((fcsr >> cc) & 0xf) != 0)
-        target = pc + offset;
-    else
-        target = pc + 8;
-    
     Context context;
 
     if (!WriteRegisterUnsigned (context, eRegisterKindDWARF, dwarf_pc_mips64, target))
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h b/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h
index e0b2079..4ca274c 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h
@@ -133,121 +133,37 @@ protected:
     Emulate_LDST_Reg (llvm::MCInst& insn);
 
     bool
-    Emulate_BEQ (llvm::MCInst& insn);
+    Emulate_BXX_3ops (llvm::MCInst& insn);
 
     bool
-    Emulate_BNE (llvm::MCInst& insn);
+    Emulate_BXX_3ops_C (llvm::MCInst& insn);
 
     bool
-    Emulate_BEQL (llvm::MCInst& insn);
+    Emulate_BXX_2ops (llvm::MCInst& insn);
 
     bool
-    Emulate_BNEL (llvm::MCInst& insn);
+    Emulate_BXX_2ops_C (llvm::MCInst& insn);
 
     bool
-    Emulate_BGEZALL (llvm::MCInst& insn);
+    Emulate_Bcond_Link_C (llvm::MCInst& insn);
 
     bool
-    Emulate_BAL (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGEZAL (llvm::MCInst& insn);
-
-    bool
-    Emulate_BALC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGEZ (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLEZALC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGEZALC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLTZALC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGTZALC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BEQZALC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BNEZALC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BEQC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BNEC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLTC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGEC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLTUC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGEUC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLTZC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLEZC (llvm::MCInst& insn);
+    Emulate_Bcond_Link (llvm::MCInst& insn);
 
     bool
-    Emulate_BGEZC (llvm::MCInst& insn);
+    Emulate_FP_branch (llvm::MCInst& insn);
 
     bool
-    Emulate_BGTZC (llvm::MCInst& insn);
+    Emulate_3D_branch (llvm::MCInst& insn);
 
     bool
-    Emulate_BEQZC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BNEZC (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGEZL (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGTZ (llvm::MCInst& insn);
-
-    bool
-    Emulate_BGTZL (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLEZ (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLEZL (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLTZ (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLTZAL (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLTZALL (llvm::MCInst& insn);
-
-    bool
-    Emulate_BLTZL (llvm::MCInst& insn);
+    Emulate_BAL (llvm::MCInst& insn);
 
     bool
-    Emulate_BOVC (llvm::MCInst& insn);
+    Emulate_BALC (llvm::MCInst& insn);
 
     bool
-    Emulate_BNVC (llvm::MCInst& insn);
+    Emulate_BC (llvm::MCInst& insn);
 
     bool
     Emulate_J (llvm::MCInst& insn);
@@ -268,36 +184,12 @@ protected:
     Emulate_JR (llvm::MCInst& insn);
 
     bool
-    Emulate_BC1F (llvm::MCInst& insn);
-
-    bool
-    Emulate_BC1T (llvm::MCInst& insn);
-
-    bool
-    Emulate_BC1FL (llvm::MCInst& insn);
-
-    bool
-    Emulate_BC1TL (llvm::MCInst& insn);
-
-    bool
     Emulate_BC1EQZ (llvm::MCInst& insn);
 
     bool
     Emulate_BC1NEZ (llvm::MCInst& insn);
 
     bool
-    Emulate_BC1ANY2F  (llvm::MCInst& insn);
-
-    bool
-    Emulate_BC1ANY2T  (llvm::MCInst& insn);
-
-    bool
-    Emulate_BC1ANY4F  (llvm::MCInst& insn);
-
-    bool
-    Emulate_BC1ANY4T  (llvm::MCInst& insn);
-
-    bool
     Emulate_BNZB  (llvm::MCInst& insn);
 
     bool
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
index a554aa5..09031e2 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -296,6 +296,22 @@ CPlusPlusLanguage::MethodName::GetQualifiers ()
     return m_qualifiers;
 }
 
+std::string
+CPlusPlusLanguage::MethodName::GetScopeQualifiedName ()
+{
+    if (!m_parsed)
+        Parse();
+    if (m_basename.empty() || m_context.empty())
+        return std::string();
+
+    std::string res;
+    res += m_context;
+    res += "::";
+    res += m_basename;
+
+    return res;
+}
+
 bool
 CPlusPlusLanguage::IsCPPMangledName (const char *name)
 {
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
index 1a8c0f6..f0fc07e 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
@@ -89,6 +89,9 @@ public:
         {
             return m_full;
         }
+
+        std::string
+        GetScopeQualifiedName ();
         
         llvm::StringRef
         GetBasename ();
diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
index a5fa004..8e2cfb5 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
@@ -319,46 +319,6 @@ ItaniumABILanguageRuntime::IsVTableName (const char *name)
         return false;
 }
 
-static std::map<ConstString, std::vector<ConstString> >&
-GetAlternateManglingPrefixes()
-{
-    static std::map<ConstString, std::vector<ConstString> > g_alternate_mangling_prefixes;
-    return g_alternate_mangling_prefixes;
-}
-
-
-size_t
-ItaniumABILanguageRuntime::GetAlternateManglings(const ConstString &mangled, std::vector<ConstString> &alternates)
-{
-    if (!mangled)
-        return static_cast<size_t>(0);
-
-    alternates.clear();
-    const char *mangled_cstr = mangled.AsCString();
-    std::map<ConstString, std::vector<ConstString> >& alternate_mangling_prefixes = GetAlternateManglingPrefixes();
-    for (std::map<ConstString, std::vector<ConstString> >::iterator it = alternate_mangling_prefixes.begin();
-         it != alternate_mangling_prefixes.end();
-         ++it)
-    {
-        const char *prefix_cstr = it->first.AsCString();
-        if (strncmp(mangled_cstr, prefix_cstr, strlen(prefix_cstr)) == 0)
-        {
-            const std::vector<ConstString> &alternate_prefixes = it->second;
-            for (size_t i = 0; i < alternate_prefixes.size(); ++i)
-            {
-                std::string alternate_mangling(alternate_prefixes[i].AsCString());
-                alternate_mangling.append(mangled_cstr + strlen(prefix_cstr));
-
-                alternates.push_back(ConstString(alternate_mangling.c_str()));
-            }
-
-            return alternates.size();
-        }
-    }
-
-    return static_cast<size_t>(0);
-}
-
 //------------------------------------------------------------------
 // Static Functions
 //------------------------------------------------------------------
@@ -382,17 +342,6 @@ ItaniumABILanguageRuntime::Initialize()
     PluginManager::RegisterPlugin (GetPluginNameStatic(),
                                    "Itanium ABI for the C++ language",
                                    CreateInstance);    
-
-    // Alternate manglings for std::basic_string<...>
-    std::vector<ConstString> basic_string_alternates;
-    basic_string_alternates.push_back(ConstString("_ZNSs"));
-    basic_string_alternates.push_back(ConstString("_ZNKSs"));
-    std::map<ConstString, std::vector<ConstString> >& alternate_mangling_prefixes = GetAlternateManglingPrefixes();
-
-    alternate_mangling_prefixes[ConstString("_ZNSbIcSt17char_traits<char>St15allocator<char>E")] =
-        basic_string_alternates;
-    alternate_mangling_prefixes[ConstString("_ZNKSbIcSt17char_traits<char>St15allocator<char>E")] =
-        basic_string_alternates;
 }
 
 void
diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h
index 519a3ce..c06b986 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h
@@ -80,9 +80,6 @@ namespace lldb_private {
         lldb::SearchFilterSP
         CreateExceptionSearchFilter() override;
 
-        size_t
-        GetAlternateManglings(const ConstString &mangled, std::vector<ConstString> &alternates) override;
-
         //------------------------------------------------------------------
         // PluginInterface protocol
         //------------------------------------------------------------------
diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
index cdb9525..2810b24 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
@@ -69,7 +69,20 @@ AppleObjCRuntime::GetObjectDescription (Stream &str, ValueObject &valobj)
     if (!valobj.ResolveValue(val.GetScalar()))
         return false;
     
-    ExecutionContext exe_ctx (valobj.GetExecutionContextRef());
+    // Value Objects may not have a process in their ExecutionContextRef.  But we need to have one
+    // in the ref we pass down to eventually call description.  Get it from the target if it isn't
+    // present.
+    ExecutionContext exe_ctx;
+    if (valobj.GetProcessSP())
+    {
+        exe_ctx = ExecutionContext(valobj.GetExecutionContextRef());
+    }
+    else
+    {
+        exe_ctx.SetContext(valobj.GetTargetSP(), true);
+        if (!exe_ctx.HasProcessScope())
+            return false;
+    }
     return GetObjectDescription(str, val, exe_ctx.GetBestExecutionContextScope());
                    
 }
diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
index 8c485d9..5844494 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
@@ -405,9 +405,18 @@ AppleObjCRuntimeV2::GetDynamicTypeAndAddress (ValueObject &in_value,
                                               Address &address,
                                               Value::ValueType &value_type)
 {
-    // The Runtime is attached to a particular process, you shouldn't pass in a value from another process.
-    assert (in_value.GetProcessSP().get() == m_process);
+    // We should never get here with a null process...
     assert (m_process != NULL);
+
+    // The Runtime is attached to a particular process, you shouldn't pass in a value from another process.
+    // Note, however, the process might be NULL (e.g. if the value was made with SBTarget::EvaluateExpression...)
+    // in which case it is sufficient if the target's match:
+    
+    Process *process = in_value.GetProcessSP().get();
+    if (process)
+        assert (process == m_process);
+    else
+        assert (in_value.GetTargetSP().get() == m_process->CalculateTarget().get());
     
     class_type_or_name.Clear();
     value_type = Value::ValueType::eValueTypeScalar;
diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
index 5d82ded..8e5d31b 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
@@ -235,16 +235,28 @@ struct RenderScriptRuntime::AllocationDetails
         }
     };
 
-    // Header for reading and writing allocation contents
-    // to a binary file.
+    // The FileHeader struct specifies the header we use for writing allocations to a binary file.
+    // Our format begins with the ASCII characters "RSAD", identifying the file as an allocation dump.
+    // Member variables dims and hdr_size are then written consecutively, immediately followed by an instance of
+    // the ElementHeader struct. Because Elements can contain subelements, there may be more than one instance
+    // of the ElementHeader struct. With this first instance being the root element, and the other instances being
+    // the root's descendants. To identify which instances are an ElementHeader's children, each struct
+    // is immediately followed by a sequence of consecutive offsets to the start of its child structs.
+    // These offsets are 4 bytes in size, and the 0 offset signifies no more children.
     struct FileHeader
     {
         uint8_t ident[4];      // ASCII 'RSAD' identifying the file
-        uint16_t hdr_size;     // Header size in bytes, for backwards compatability
-        uint16_t type;         // DataType enum
-        uint32_t kind;         // DataKind enum
         uint32_t dims[3];      // Dimensions
-        uint32_t element_size; // Size of a single element, including padding
+        uint16_t hdr_size;     // Header size in bytes, including all element headers
+    };
+
+    struct ElementHeader
+    {
+        uint16_t type;          // DataType enum
+        uint32_t kind;          // DataKind enum
+        uint32_t element_size;  // Size of a single element, including padding
+        uint16_t vector_size;   // Vector width
+        uint32_t array_size;    // Number of elements in array
     };
 
     // Monotonically increasing from 1
@@ -286,7 +298,6 @@ struct RenderScriptRuntime::AllocationDetails
     }
 };
 
-
 const ConstString &
 RenderScriptRuntime::Element::GetFallbackStructName()
 {
@@ -2084,37 +2095,62 @@ RenderScriptRuntime::LoadAllocation(Stream &strm, const uint32_t alloc_id, const
 
     // Cast start of buffer to FileHeader and use pointer to read metadata
     void* file_buffer = data_sp->GetBytes();
-    const AllocationDetails::FileHeader* head = static_cast<AllocationDetails::FileHeader*>(file_buffer);
+    if (file_buffer == NULL || data_sp->GetByteSize() <
+        (sizeof(AllocationDetails::FileHeader) + sizeof(AllocationDetails::ElementHeader)))
+    {
+        strm.Printf("Error: File %s does not contain enough data for header", filename);
+        strm.EOL();
+        return false;
+    }
+    const AllocationDetails::FileHeader* file_header = static_cast<AllocationDetails::FileHeader*>(file_buffer);
 
-    // Advance buffer past header
-    file_buffer = static_cast<uint8_t*>(file_buffer) + head->hdr_size;
+    // Check file starts with ascii characters "RSAD"
+    if (file_header->ident[0] != 'R' || file_header->ident[1] != 'S' || file_header->ident[2] != 'A'
+        || file_header->ident[3] != 'D')
+    {
+        strm.Printf("Error: File doesn't contain identifier for an RS allocation dump. Are you sure this is the correct file?");
+        strm.EOL();
+        return false;
+    }
+
+    // Look at the type of the root element in the header
+    AllocationDetails::ElementHeader root_element_header;
+    memcpy(&root_element_header, static_cast<uint8_t*>(file_buffer) + sizeof(AllocationDetails::FileHeader),
+           sizeof(AllocationDetails::ElementHeader));
 
     if (log)
         log->Printf("RenderScriptRuntime::LoadAllocation - header type %u, element size %u",
-                    head->type, head->element_size);
+                    root_element_header.type, root_element_header.element_size);
 
     // Check if the target allocation and file both have the same number of bytes for an Element
-    if (*alloc->element.datum_size.get() != head->element_size)
+    if (*alloc->element.datum_size.get() != root_element_header.element_size)
     {
         strm.Printf("Warning: Mismatched Element sizes - file %u bytes, allocation %u bytes",
-                    head->element_size, *alloc->element.datum_size.get());
+                    root_element_header.element_size, *alloc->element.datum_size.get());
         strm.EOL();
     }
 
-    // Check if the target allocation and file both have the same integral type
-    const unsigned int type = static_cast<unsigned int>(*alloc->element.type.get());
-    if (type != head->type)
+    // Check if the target allocation and file both have the same type
+    const unsigned int alloc_type = static_cast<unsigned int>(*alloc->element.type.get());
+    const unsigned int file_type = root_element_header.type;
+
+    if (file_type > Element::RS_TYPE_FONT)
+    {
+        strm.Printf("Warning: File has unknown allocation type");
+        strm.EOL();
+    }
+    else if (alloc_type != file_type)
     {
         // Enum value isn't monotonous, so doesn't always index RsDataTypeToString array
-        unsigned int printable_target_type_index = type;
-        unsigned int printable_head_type_index = head->type;
-        if (type >= Element::RS_TYPE_ELEMENT && type <= Element::RS_TYPE_FONT)
+        unsigned int printable_target_type_index = alloc_type;
+        unsigned int printable_head_type_index = file_type;
+        if (alloc_type >= Element::RS_TYPE_ELEMENT && alloc_type <= Element::RS_TYPE_FONT)
             printable_target_type_index = static_cast<Element::DataType>(
-                                         (type - Element::RS_TYPE_ELEMENT) + Element::RS_TYPE_MATRIX_2X2 + 1);
+                                         (alloc_type - Element::RS_TYPE_ELEMENT) + Element::RS_TYPE_MATRIX_2X2 + 1);
 
-        if (head->type >= Element::RS_TYPE_ELEMENT && head->type <= Element::RS_TYPE_FONT)
+        if (file_type >= Element::RS_TYPE_ELEMENT && file_type <= Element::RS_TYPE_FONT)
             printable_head_type_index = static_cast<Element::DataType>(
-                                        (head->type - Element::RS_TYPE_ELEMENT) + Element::RS_TYPE_MATRIX_2X2 + 1);
+                                        (file_type - Element::RS_TYPE_ELEMENT) + Element::RS_TYPE_MATRIX_2X2 + 1);
 
         const char* file_type_cstr = AllocationDetails::RsDataTypeToString[printable_head_type_index][0];
         const char* target_type_cstr = AllocationDetails::RsDataTypeToString[printable_target_type_index][0];
@@ -2124,8 +2160,11 @@ RenderScriptRuntime::LoadAllocation(Stream &strm, const uint32_t alloc_id, const
         strm.EOL();
     }
 
+    // Advance buffer past header
+    file_buffer = static_cast<uint8_t*>(file_buffer) + file_header->hdr_size;
+
     // Calculate size of allocation data in file
-    size_t length = data_sp->GetByteSize() - head->hdr_size;
+    size_t length = data_sp->GetByteSize() - file_header->hdr_size;
 
     // Check if the target allocation and file both have the same total data size.
     const unsigned int alloc_size = *alloc->size.get();
@@ -2154,6 +2193,62 @@ RenderScriptRuntime::LoadAllocation(Stream &strm, const uint32_t alloc_id, const
     return true;
 }
 
+// Function takes as parameters a byte buffer, which will eventually be written to file as the element header,
+// an offset into that buffer, and an Element that will be saved into the buffer at the parametrised offset.
+// Return value is the new offset after writing the element into the buffer.
+// Elements are saved to the file as the ElementHeader struct followed by offsets to the structs of all the element's children.
+size_t
+RenderScriptRuntime::PopulateElementHeaders(const std::shared_ptr<uint8_t> header_buffer, size_t offset, const Element& elem)
+{
+    // File struct for an element header with all the relevant details copied from elem.
+    // We assume members are valid already.
+    AllocationDetails::ElementHeader elem_header;
+    elem_header.type = *elem.type.get();
+    elem_header.kind = *elem.type_kind.get();
+    elem_header.element_size = *elem.datum_size.get();
+    elem_header.vector_size = *elem.type_vec_size.get();
+    elem_header.array_size = elem.array_size.isValid() ? *elem.array_size.get() : 0;
+    const size_t elem_header_size = sizeof(AllocationDetails::ElementHeader);
+
+    // Copy struct into buffer and advance offset
+    // We assume that header_buffer has been checked for NULL before this method is called
+    memcpy(header_buffer.get() + offset, &elem_header, elem_header_size);
+    offset += elem_header_size;
+
+    // Starting offset of child ElementHeader struct
+    size_t child_offset = offset + ((elem.children.size() + 1) * sizeof(uint32_t));
+    for (const RenderScriptRuntime::Element& child : elem.children)
+    {
+        // Recursively populate the buffer with the element header structs of children.
+        // Then save the offsets where they were set after the parent element header.
+        memcpy(header_buffer.get() + offset, &child_offset, sizeof(uint32_t));
+        offset += sizeof(uint32_t);
+
+        child_offset = PopulateElementHeaders(header_buffer, child_offset, child);
+    }
+
+    // Zero indicates no more children
+    memset(header_buffer.get() + offset, 0, sizeof(uint32_t));
+
+    return child_offset;
+}
+
+// Given an Element object this function returns the total size needed in the file header to store the element's details.
+// Taking into account the size of the element header struct, plus the offsets to all the element's children.
+// Function is recursive so that the size of all ancestors is taken into account.
+size_t
+RenderScriptRuntime::CalculateElementHeaderSize(const Element& elem)
+{
+    size_t size = (elem.children.size() + 1) * sizeof(uint32_t); // Offsets to children plus zero terminator
+    size += sizeof(AllocationDetails::ElementHeader); // Size of header struct with type details
+
+    // Calculate recursively for all descendants
+    for (const Element& child : elem.children)
+        size += CalculateElementHeaderSize(child);
+
+    return size;
+}
+
 // Function copies allocation contents into a binary file.
 // This file can then be loaded later into a different allocation.
 // There is a header, FileHeader, before the allocation data containing meta-data.
@@ -2209,17 +2304,44 @@ RenderScriptRuntime::SaveAllocation(Stream &strm, const uint32_t alloc_id, const
     // Create the file header
     AllocationDetails::FileHeader head;
     head.ident[0] = 'R'; head.ident[1] = 'S'; head.ident[2] = 'A'; head.ident[3] = 'D';
-    head.hdr_size = static_cast<uint16_t>(sizeof(AllocationDetails::FileHeader));
-    head.type = static_cast<uint16_t>(*alloc->element.type.get());
-    head.kind = static_cast<uint32_t>(*alloc->element.type_kind.get());
     head.dims[0] = static_cast<uint32_t>(alloc->dimension.get()->dim_1);
     head.dims[1] = static_cast<uint32_t>(alloc->dimension.get()->dim_2);
     head.dims[2] = static_cast<uint32_t>(alloc->dimension.get()->dim_3);
-    head.element_size = static_cast<uint32_t>(*alloc->element.datum_size.get());
+
+    const size_t element_header_size = CalculateElementHeaderSize(alloc->element);
+    assert((sizeof(AllocationDetails::FileHeader) + element_header_size) < UINT16_MAX && "Element header too large");
+    head.hdr_size = static_cast<uint16_t>(sizeof(AllocationDetails::FileHeader) + element_header_size);
 
     // Write the file header
     size_t num_bytes = sizeof(AllocationDetails::FileHeader);
-    Error err = file.Write(static_cast<const void*>(&head), num_bytes);
+    if (log)
+        log->Printf("RenderScriptRuntime::SaveAllocation - Writing File Header, 0x%zX bytes", num_bytes);
+
+    Error err = file.Write(&head, num_bytes);
+    if (!err.Success())
+    {
+        strm.Printf("Error: '%s' when writing to file '%s'", err.AsCString(), filename);
+        strm.EOL();
+        return false;
+    }
+
+    // Create the headers describing the element type of the allocation.
+    std::shared_ptr<uint8_t> element_header_buffer(new uint8_t[element_header_size]);
+    if (element_header_buffer == nullptr)
+    {
+        strm.Printf("Internal Error: Couldn't allocate %zu bytes on the heap", element_header_size);
+        strm.EOL();
+        return false;
+    }
+
+    PopulateElementHeaders(element_header_buffer, 0, alloc->element);
+
+    // Write headers for allocation element type to file
+    num_bytes = element_header_size;
+    if (log)
+        log->Printf("RenderScriptRuntime::SaveAllocation - Writing Element Headers, 0x%zX bytes", num_bytes);
+
+    err = file.Write(element_header_buffer.get(), num_bytes);
     if (!err.Success())
     {
         strm.Printf("Error: '%s' when writing to file '%s'", err.AsCString(), filename);
@@ -2230,7 +2352,7 @@ RenderScriptRuntime::SaveAllocation(Stream &strm, const uint32_t alloc_id, const
     // Write allocation data to file
     num_bytes = static_cast<size_t>(*alloc->size.get());
     if (log)
-        log->Printf("RenderScriptRuntime::SaveAllocation - Writing 0x%" PRIx64 " bytes from %p", (uint64_t) num_bytes, (void*) buffer.get());
+        log->Printf("RenderScriptRuntime::SaveAllocation - Writing 0x%zX bytes", num_bytes);
 
     err = file.Write(buffer.get(), num_bytes);
     if (!err.Success())
diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
index 0ca268c..2fe4390 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
@@ -207,10 +207,6 @@ public:
 
     void Status(Stream &strm) const;
 
-    size_t GetAlternateManglings(const ConstString &mangled, std::vector<ConstString> &alternates) override {
-        return static_cast<size_t>(0);
-    }
-
     void ModulesDidLoad(const ModuleList &module_list) override;
 
     bool LoadAllocation(Stream &strm, const uint32_t alloc_id, const char* filename, StackFrame* frame_ptr);
@@ -335,6 +331,9 @@ private:
     static bool GetFrameVarAsUnsigned(const lldb::StackFrameSP, const char* var_name, uint64_t& val);
     void FindStructTypeName(Element& elem, StackFrame* frame_ptr);
 
+    size_t PopulateElementHeaders(const std::shared_ptr<uint8_t> header_buffer, size_t offset, const Element& elem);
+    size_t CalculateElementHeaderSize(const Element& elem);
+
     //
     // Helper functions for jitting the runtime
     //
diff --git a/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index d90b253..4777f09 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -330,15 +330,15 @@ mipsVariantFromElfFlags(const elf::elf_word e_flags, uint32_t endian)
     {
         case llvm::ELF::EF_MIPS_ARCH_1:
         case llvm::ELF::EF_MIPS_ARCH_2:
-        case llvm::ELF::EF_MIPS_ARCH_3:
-        case llvm::ELF::EF_MIPS_ARCH_4:
-        case llvm::ELF::EF_MIPS_ARCH_5:
         case llvm::ELF::EF_MIPS_ARCH_32:
             return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32el : ArchSpec::eMIPSSubType_mips32;
         case llvm::ELF::EF_MIPS_ARCH_32R2:
             return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32r2el : ArchSpec::eMIPSSubType_mips32r2;
         case llvm::ELF::EF_MIPS_ARCH_32R6:
             return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32r6el : ArchSpec::eMIPSSubType_mips32r6;
+        case llvm::ELF::EF_MIPS_ARCH_3:
+        case llvm::ELF::EF_MIPS_ARCH_4:
+        case llvm::ELF::EF_MIPS_ARCH_5:
         case llvm::ELF::EF_MIPS_ARCH_64:
             return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64el : ArchSpec::eMIPSSubType_mips64;
         case llvm::ELF::EF_MIPS_ARCH_64R2:
@@ -954,9 +954,6 @@ ObjectFileELF::GetAddressByteSize() const
     return m_data.GetAddressByteSize();
 }
 
-// Top 16 bits of the `Symbol` flags are available.
-#define ARM_ELF_SYM_IS_THUMB    (1 << 16)
-
 AddressClass
 ObjectFileELF::GetAddressClass (addr_t file_addr)
 {
@@ -2195,7 +2192,6 @@ ObjectFileELF::ParseSymbols (Symtab *symtab,
                         // symbol.st_value to produce the final symbol_value
                         // that we store in the symtab.
                         symbol_value_offset = -1;
-                        additional_flags = ARM_ELF_SYM_IS_THUMB;
                         m_address_class_map[symbol.st_value^1] = eAddressClassCodeAlternateISA;
                     }
                     else
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
index bd3978c..ebeba8c 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
@@ -72,6 +72,7 @@ lldb_private::InferiorCallMmap (Process *process,
             options.SetTryAllThreads(true);
             options.SetDebug (false);
             options.SetTimeoutUsec(500000);
+            options.SetTrapExceptions(false);
 
             addr_t prot_arg, flags_arg = 0;
             if (prot == eMmapProtNone)
@@ -172,6 +173,7 @@ lldb_private::InferiorCallMunmap (Process *process,
             options.SetTryAllThreads(true);
             options.SetDebug (false);
             options.SetTimeoutUsec(500000);
+            options.SetTrapExceptions(false);
            
             AddressRange munmap_range;
             if (sc.GetAddressRange(range_scope, 0, use_inline_block_range, munmap_range))
@@ -214,7 +216,8 @@ lldb_private::InferiorCallMunmap (Process *process,
 bool
 lldb_private::InferiorCall (Process *process,
                             const Address *address,
-                            addr_t &returned_func)
+                            addr_t &returned_func,
+                            bool trap_exceptions)
 {
     Thread *thread = process->GetThreadList().GetSelectedThread().get();
     if (thread == NULL || address == NULL)
@@ -227,6 +230,7 @@ lldb_private::InferiorCall (Process *process,
     options.SetTryAllThreads(true);
     options.SetDebug (false);
     options.SetTimeoutUsec(500000);
+    options.SetTrapExceptions(trap_exceptions);
 
     ClangASTContext *clang_ast_context = process->GetTarget().GetScratchClangASTContext();
     CompilerType clang_void_ptr_type = clang_ast_context->GetBasicType(eBasicTypeVoid).GetPointerType();
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h
index e56e95c..d10e849 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h
@@ -31,7 +31,8 @@ bool InferiorCallMmap(Process *proc, lldb::addr_t &allocated_addr,
 
 bool InferiorCallMunmap(Process *proc, lldb::addr_t addr, lldb::addr_t length);
 
-bool InferiorCall(Process *proc, const Address *address, lldb::addr_t &returned_func);
+bool InferiorCall(Process *proc, const Address *address, lldb::addr_t &returned_func,
+                  bool trap_exceptions = false);
 
 }   // namespace lldb_private
 
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
index 278a1d5..efda0ed 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
@@ -265,9 +265,32 @@ RegisterContextLLDB::InitializeZerothFrame()
 
     if (!ReadCFAValueForRow (row_register_kind, active_row, m_cfa))
     {
-        UnwindLogMsg ("could not read CFA register for this frame.");
-        m_frame_type = eNotAValidFrame;
-        return;
+        // Try the fall back unwind plan since the
+        // full unwind plan failed.
+        FuncUnwindersSP func_unwinders_sp;
+        UnwindPlanSP call_site_unwind_plan;
+        bool cfa_status = false;
+
+        if (m_sym_ctx_valid)
+        {
+            func_unwinders_sp = pc_module_sp->GetObjectFile()->GetUnwindTable().GetFuncUnwindersContainingAddress (m_current_pc, m_sym_ctx);
+        }
+
+        if(func_unwinders_sp.get() != nullptr)
+            call_site_unwind_plan = func_unwinders_sp->GetUnwindPlanAtCallSite(process->GetTarget(), m_current_offset_backed_up_one);
+
+        if (call_site_unwind_plan.get() != nullptr)
+        {
+            m_fallback_unwind_plan_sp = call_site_unwind_plan;
+            if(TryFallbackUnwindPlan())
+                cfa_status = true;
+        }
+        if (!cfa_status)
+        {
+            UnwindLogMsg ("could not read CFA value for first frame.");
+            m_frame_type = eNotAValidFrame;
+            return;
+        }
     }
 
     UnwindLogMsg ("initialized frame current pc is 0x%" PRIx64 " cfa is 0x%" PRIx64 " using %s UnwindPlan",
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index 5c7f6ca..c0ea9cc 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -101,6 +101,8 @@ GDBRemoteCommunicationClient::GDBRemoteCommunicationClient() :
     m_supports_QEnvironment (true),
     m_supports_QEnvironmentHexEncoded (true),
     m_supports_qSymbol (true),
+    m_qSymbol_requests_done (false),
+    m_supports_qModuleInfo (true),
     m_supports_jThreadsInfo (true),
     m_curr_pid (LLDB_INVALID_PROCESS_ID),
     m_curr_tid (LLDB_INVALID_THREAD_ID),
@@ -376,6 +378,8 @@ GDBRemoteCommunicationClient::ResetDiscoverableSettings (bool did_exec)
         m_supports_QEnvironment = true;
         m_supports_QEnvironmentHexEncoded = true;
         m_supports_qSymbol = true;
+        m_qSymbol_requests_done = false;
+        m_supports_qModuleInfo = true;
         m_host_arch.Clear();
         m_os_version_major = UINT32_MAX;
         m_os_version_minor = UINT32_MAX;
@@ -4284,6 +4288,9 @@ GDBRemoteCommunicationClient::GetModuleInfo (const FileSpec& module_file_spec,
                                              const lldb_private::ArchSpec& arch_spec,
                                              ModuleSpec &module_spec)
 {
+    if (!m_supports_qModuleInfo)
+        return false;
+
     std::string module_path = module_file_spec.GetPath (false);
     if (module_path.empty ())
         return false;
@@ -4299,8 +4306,14 @@ GDBRemoteCommunicationClient::GetModuleInfo (const FileSpec& module_file_spec,
     if (SendPacketAndWaitForResponse (packet.GetData(), packet.GetSize(), response, false) != PacketResult::Success)
         return false;
 
-    if (response.IsErrorResponse () || response.IsUnsupportedResponse ())
+    if (response.IsErrorResponse ())
+        return false;
+
+    if (response.IsUnsupportedResponse ())
+    {
+        m_supports_qModuleInfo = false;
         return false;
+    }
 
     std::string name;
     std::string value;
@@ -4432,11 +4445,42 @@ GDBRemoteCommunicationClient::ReadExtFeature (const lldb_private::ConstString ob
 //  qSymbol:<sym_name>  The target requests the value of symbol sym_name (hex encoded).
 //                      LLDB may provide the value by sending another qSymbol packet
 //                      in the form of"qSymbol:<sym_value>:<sym_name>".
+//
+//  Three examples:
+//
+//  lldb sends:    qSymbol::
+//  lldb receives: OK
+//     Remote gdb stub does not need to know the addresses of any symbols, lldb does not
+//     need to ask again in this session.
+//
+//  lldb sends:    qSymbol::
+//  lldb receives: qSymbol:64697370617463685f71756575655f6f666673657473
+//  lldb sends:    qSymbol::64697370617463685f71756575655f6f666673657473
+//  lldb receives: OK
+//     Remote gdb stub asks for address of 'dispatch_queue_offsets'.  lldb does not know
+//     the address at this time.  lldb needs to send qSymbol:: again when it has more
+//     solibs loaded.
+//
+//  lldb sends:    qSymbol::
+//  lldb receives: qSymbol:64697370617463685f71756575655f6f666673657473
+//  lldb sends:    qSymbol:2bc97554:64697370617463685f71756575655f6f666673657473
+//  lldb receives: OK
+//     Remote gdb stub asks for address of 'dispatch_queue_offsets'.  lldb says that it
+//     is at address 0x2bc97554.  Remote gdb stub sends 'OK' indicating that it does not
+//     need any more symbols.  lldb does not need to ask again in this session.
 
 void
 GDBRemoteCommunicationClient::ServeSymbolLookups(lldb_private::Process *process)
 {
-    if (m_supports_qSymbol)
+    // Set to true once we've resolved a symbol to an address for the remote stub.
+    // If we get an 'OK' response after this, the remote stub doesn't need any more
+    // symbols and we can stop asking.
+    bool symbol_response_provided = false;
+
+    // Is this the inital qSymbol:: packet?
+    bool first_qsymbol_query = true;
+
+    if (m_supports_qSymbol && m_qSymbol_requests_done == false)
     {
         Mutex::Locker locker;
         if (GetSequenceMutex(locker, "GDBRemoteCommunicationClient::ServeSymbolLookups() failed due to not getting the sequence mutex"))
@@ -4448,9 +4492,15 @@ GDBRemoteCommunicationClient::ServeSymbolLookups(lldb_private::Process *process)
             {
                 if (response.IsOKResponse())
                 {
+                    if (symbol_response_provided || first_qsymbol_query)
+                    {
+                        m_qSymbol_requests_done = true;
+                    }
+
                     // We are done serving symbols requests
                     return;
                 }
+                first_qsymbol_query = false;
 
                 if (response.IsUnsupportedResponse())
                 {
@@ -4530,7 +4580,14 @@ GDBRemoteCommunicationClient::ServeSymbolLookups(lldb_private::Process *process)
                             packet.Clear();
                             packet.PutCString("qSymbol:");
                             if (symbol_load_addr != LLDB_INVALID_ADDRESS)
+                            {
                                 packet.Printf("%" PRIx64, symbol_load_addr);
+                                symbol_response_provided = true;
+                            }
+                            else
+                            {
+                                symbol_response_provided = false;
+                            }
                             packet.PutCString(":");
                             packet.PutBytesAsRawHex8(symbol_name.data(), symbol_name.size());
                             continue; // go back to the while loop and send "packet" and wait for another response
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
index d2df214..311b0f3 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
@@ -619,6 +619,8 @@ protected:
         m_supports_QEnvironment:1,
         m_supports_QEnvironmentHexEncoded:1,
         m_supports_qSymbol:1,
+        m_qSymbol_requests_done:1,
+        m_supports_qModuleInfo:1,
         m_supports_jThreadsInfo:1;
     
     lldb::pid_t m_curr_pid;
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index bb528eb..6bee4e1 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -172,118 +172,6 @@ namespace {
 
 } // anonymous namespace end
 
-class ProcessGDBRemote::GDBLoadedModuleInfoList
-{
-public:
-
-    class LoadedModuleInfo
-    {
-    public:
-
-        enum e_data_point
-        {
-            e_has_name      = 0,
-            e_has_base      ,
-            e_has_dynamic   ,
-            e_has_link_map  ,
-            e_num
-        };
-
-        LoadedModuleInfo ()
-        {
-            for (uint32_t i = 0; i < e_num; ++i)
-                m_has[i] = false;
-        }
-
-        void set_name (const std::string & name)
-        {
-            m_name = name;
-            m_has[e_has_name] = true;
-        }
-        bool get_name (std::string & out) const
-        {
-            out = m_name;
-            return m_has[e_has_name];
-        }
-
-        void set_base (const lldb::addr_t base)
-        {
-            m_base = base;
-            m_has[e_has_base] = true;
-        }
-        bool get_base (lldb::addr_t & out) const
-        {
-            out = m_base;
-            return m_has[e_has_base];
-        }
-
-        void set_base_is_offset (bool is_offset)
-        {
-            m_base_is_offset = is_offset;
-        }
-        bool get_base_is_offset(bool & out) const
-        {
-            out = m_base_is_offset;
-            return m_has[e_has_base];
-        }
-
-        void set_link_map (const lldb::addr_t addr)
-        {
-            m_link_map = addr;
-            m_has[e_has_link_map] = true;
-        }
-        bool get_link_map (lldb::addr_t & out) const
-        {
-            out = m_link_map;
-            return m_has[e_has_link_map];
-        }
-
-        void set_dynamic (const lldb::addr_t addr)
-        {
-            m_dynamic = addr;
-            m_has[e_has_dynamic] = true;
-        }
-        bool get_dynamic (lldb::addr_t & out) const
-        {
-            out = m_dynamic;
-            return m_has[e_has_dynamic];
-        }
-
-        bool has_info (e_data_point datum)
-        {
-            assert (datum < e_num);
-            return m_has[datum];
-        }
-
-    protected:
-
-        bool m_has[e_num];
-        std::string m_name;
-        lldb::addr_t m_link_map;
-        lldb::addr_t m_base;
-        bool m_base_is_offset;
-        lldb::addr_t m_dynamic;
-    };
-
-    GDBLoadedModuleInfoList ()
-        : m_list ()
-        , m_link_map (LLDB_INVALID_ADDRESS)
-    {}
-
-    void add (const LoadedModuleInfo & mod)
-    {
-        m_list.push_back (mod);
-    }
-
-    void clear ()
-    {
-        m_list.clear ();
-    }
-
-    std::vector<LoadedModuleInfo> m_list;
-    lldb::addr_t m_link_map;
-};
-
 // TODO Randomly assigning a port is unsafe.  We should get an unused
 // ephemeral port from the kernel and make sure we reserve it before passing
 // it to debugserver.
@@ -2033,6 +1921,8 @@ ProcessGDBRemote::SetThreadStopInfo (lldb::tid_t tid,
                                      const std::vector<addr_t> &exc_data,
                                      addr_t thread_dispatch_qaddr,
                                      bool queue_vars_valid, // Set to true if queue_name, queue_kind and queue_serial are valid
+                                     LazyBool associated_with_dispatch_queue,
+                                     addr_t dispatch_queue_t,
                                      std::string &queue_name,
                                      QueueKind queue_kind,
                                      uint64_t queue_serial)
@@ -2073,10 +1963,15 @@ ProcessGDBRemote::SetThreadStopInfo (lldb::tid_t tid,
             gdb_thread->SetThreadDispatchQAddr (thread_dispatch_qaddr);
             // Check if the GDB server was able to provide the queue name, kind and serial number
             if (queue_vars_valid)
-                gdb_thread->SetQueueInfo(std::move(queue_name), queue_kind, queue_serial);
+                gdb_thread->SetQueueInfo(std::move(queue_name), queue_kind, queue_serial, dispatch_queue_t, associated_with_dispatch_queue);
             else
                 gdb_thread->ClearQueueInfo();
 
+            gdb_thread->SetAssociatedWithLibdispatchQueue (associated_with_dispatch_queue);
+
+            if (dispatch_queue_t != LLDB_INVALID_ADDRESS)
+                gdb_thread->SetQueueLibdispatchQueueAddress (dispatch_queue_t);
+
             // Make sure we update our thread stop reason just once
             if (!thread_sp->StopInfoIsUpToDate())
             {
@@ -2247,9 +2142,11 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict)
     static ConstString g_key_metype("metype");
     static ConstString g_key_medata("medata");
     static ConstString g_key_qaddr("qaddr");
+    static ConstString g_key_dispatch_queue_t("dispatch_queue_t");
+    static ConstString g_key_associated_with_dispatch_queue("associated_with_dispatch_queue");
     static ConstString g_key_queue_name("qname");
     static ConstString g_key_queue_kind("qkind");
-    static ConstString g_key_queue_serial("qserial");
+    static ConstString g_key_queue_serial_number("qserialnum");
     static ConstString g_key_registers("registers");
     static ConstString g_key_memory("memory");
     static ConstString g_key_address("address");
@@ -2269,9 +2166,11 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict)
     addr_t thread_dispatch_qaddr = LLDB_INVALID_ADDRESS;
     ExpeditedRegisterMap expedited_register_map;
     bool queue_vars_valid = false;
+    addr_t dispatch_queue_t = LLDB_INVALID_ADDRESS;
+    LazyBool associated_with_dispatch_queue = eLazyBoolCalculate;
     std::string queue_name;
     QueueKind queue_kind = eQueueKindUnknown;
-    uint64_t queue_serial = 0;
+    uint64_t queue_serial_number = 0;
     // Iterate through all of the thread dictionary key/value pairs from the structured data dictionary
 
     thread_dict->ForEach([this,
@@ -2285,9 +2184,11 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict)
                           &exc_data,
                           &thread_dispatch_qaddr,
                           &queue_vars_valid,
+                          &associated_with_dispatch_queue,
+                          &dispatch_queue_t,
                           &queue_name,
                           &queue_kind,
-                          &queue_serial]
+                          &queue_serial_number]
                           (ConstString key, StructuredData::Object* object) -> bool
     {
         if (key == g_key_tid)
@@ -2339,12 +2240,27 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict)
                 queue_kind = eQueueKindConcurrent;
             }
         }
-        else if (key == g_key_queue_serial)
+        else if (key == g_key_queue_serial_number)
         {
-            queue_serial = object->GetIntegerValue(0);
-            if (queue_serial != 0)
+            queue_serial_number = object->GetIntegerValue(0);
+            if (queue_serial_number != 0)
                 queue_vars_valid = true;
         }
+        else if (key == g_key_dispatch_queue_t)
+        {
+            dispatch_queue_t = object->GetIntegerValue(0);
+            if (dispatch_queue_t != 0 && dispatch_queue_t != LLDB_INVALID_ADDRESS)
+                queue_vars_valid = true;
+        }
+        else if (key == g_key_associated_with_dispatch_queue)
+        {
+            queue_vars_valid = true;
+            bool associated = object->GetBooleanValue ();
+            if (associated)
+                associated_with_dispatch_queue = eLazyBoolYes;
+            else
+                associated_with_dispatch_queue = eLazyBoolNo;
+        }
         else if (key == g_key_reason)
         {
             reason = object->GetStringValue();
@@ -2415,9 +2331,11 @@ ProcessGDBRemote::SetThreadStopInfo (StructuredData::Dictionary *thread_dict)
                               exc_data,
                               thread_dispatch_qaddr,
                               queue_vars_valid,
+                              associated_with_dispatch_queue,
+                              dispatch_queue_t,
                               queue_name,
                               queue_kind,
-                              queue_serial);
+                              queue_serial_number);
 }
 
 StateType
@@ -2460,9 +2378,11 @@ ProcessGDBRemote::SetThreadStopInfo (StringExtractor& stop_packet)
             std::vector<addr_t> exc_data;
             addr_t thread_dispatch_qaddr = LLDB_INVALID_ADDRESS;
             bool queue_vars_valid = false; // says if locals below that start with "queue_" are valid
+            addr_t dispatch_queue_t = LLDB_INVALID_ADDRESS;
+            LazyBool associated_with_dispatch_queue = eLazyBoolCalculate;
             std::string queue_name;
             QueueKind queue_kind = eQueueKindUnknown;
-            uint64_t queue_serial = 0;
+            uint64_t queue_serial_number = 0;
             ExpeditedRegisterMap expedited_register_map;
             while (stop_packet.GetNameColonValue(key, value))
             {
@@ -2553,6 +2473,11 @@ ProcessGDBRemote::SetThreadStopInfo (StringExtractor& stop_packet)
                 {
                     thread_dispatch_qaddr = StringConvert::ToUInt64 (value.c_str(), 0, 16);
                 }
+                else if (key.compare("dispatch_queue_t") == 0)
+                {
+                    queue_vars_valid = true;
+                    dispatch_queue_t = StringConvert::ToUInt64 (value.c_str(), 0, 16);
+                }
                 else if (key.compare("qname") == 0)
                 {
                     queue_vars_valid = true;
@@ -2576,10 +2501,10 @@ ProcessGDBRemote::SetThreadStopInfo (StringExtractor& stop_packet)
                         queue_kind = eQueueKindConcurrent;
                     }
                 }
-                else if (key.compare("qserial") == 0)
+                else if (key.compare("qserialnum") == 0)
                 {
-                    queue_serial = StringConvert::ToUInt64 (value.c_str(), 0, 0);
-                    if (queue_serial != 0)
+                    queue_serial_number = StringConvert::ToUInt64 (value.c_str(), 0, 0);
+                    if (queue_serial_number != 0)
                         queue_vars_valid = true;
                 }
                 else if (key.compare("reason") == 0)
@@ -2677,9 +2602,11 @@ ProcessGDBRemote::SetThreadStopInfo (StringExtractor& stop_packet)
                                                     exc_data,
                                                     thread_dispatch_qaddr,
                                                     queue_vars_valid,
+                                                    associated_with_dispatch_queue,
+                                                    dispatch_queue_t,
                                                     queue_name,
                                                     queue_kind,
-                                                    queue_serial);
+                                                    queue_serial_number);
 
             return eStateStopped;
         }
@@ -3052,7 +2979,7 @@ ProcessGDBRemote::GetImageInfoAddress()
     // the loaded module list can also provides a link map address
     if (addr == LLDB_INVALID_ADDRESS)
     {
-        GDBLoadedModuleInfoList list;
+        LoadedModuleInfoList list;
         if (GetLoadedModuleList (list).Success())
             addr = list.m_link_map;
     }
@@ -4704,7 +4631,7 @@ ProcessGDBRemote::GetGDBServerRegisterInfo ()
 }
 
 Error
-ProcessGDBRemote::GetLoadedModuleList (GDBLoadedModuleInfoList & list)
+ProcessGDBRemote::GetLoadedModuleList (LoadedModuleInfoList & list)
 {
     // Make sure LLDB has an XML parser it can use first
     if (!XMLDocument::XMLEnabled())
@@ -4748,7 +4675,7 @@ ProcessGDBRemote::GetLoadedModuleList (GDBLoadedModuleInfoList & list)
 
         root_element.ForEachChildElementWithName("library", [log, &list](const XMLNode &library) -> bool {
 
-            GDBLoadedModuleInfoList::LoadedModuleInfo module;
+            LoadedModuleInfoList::LoadedModuleInfo module;
 
             library.ForEachAttribute([log, &module](const llvm::StringRef &name, const llvm::StringRef &value) -> bool {
 
@@ -4818,7 +4745,7 @@ ProcessGDBRemote::GetLoadedModuleList (GDBLoadedModuleInfoList & list)
             return Error();
 
         root_element.ForEachChildElementWithName("library", [log, &list](const XMLNode &library) -> bool {
-            GDBLoadedModuleInfoList::LoadedModuleInfo module;
+            LoadedModuleInfoList::LoadedModuleInfo module;
 
             llvm::StringRef name = library.GetAttributeValue("name");
             module.set_name(name.str());
@@ -4880,19 +4807,18 @@ ProcessGDBRemote::LoadModuleAtAddress (const FileSpec &file, lldb::addr_t base_a
 }
 
 size_t
-ProcessGDBRemote::LoadModules ()
+ProcessGDBRemote::LoadModules (LoadedModuleInfoList &module_list)
 {
     using lldb_private::process_gdb_remote::ProcessGDBRemote;
 
     // request a list of loaded libraries from GDBServer
-    GDBLoadedModuleInfoList module_list;
     if (GetLoadedModuleList (module_list).Fail())
         return 0;
 
     // get a list of all the modules
     ModuleList new_modules;
 
-    for (GDBLoadedModuleInfoList::LoadedModuleInfo & modInfo : module_list.m_list)
+    for (LoadedModuleInfoList::LoadedModuleInfo & modInfo : module_list.m_list)
     {
         std::string  mod_name;
         lldb::addr_t mod_base;
@@ -4943,6 +4869,14 @@ ProcessGDBRemote::LoadModules ()
     }
 
     return new_modules.GetSize();
+
+}
+
+size_t
+ProcessGDBRemote::LoadModules ()
+{
+    LoadedModuleInfoList module_list;
+    return LoadModules (module_list);
 }
 
 Error
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
index 5474982..b48edd8 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
@@ -27,6 +27,7 @@
 #include "lldb/Core/StringList.h"
 #include "lldb/Core/StructuredData.h"
 #include "lldb/Core/ThreadSafeValue.h"
+#include "lldb/Core/LoadedModuleInfoList.h"
 #include "lldb/Host/HostThread.h"
 #include "lldb/lldb-private-forward.h"
 #include "lldb/Utility/StringExtractor.h"
@@ -245,6 +246,9 @@ public:
                      uint32_t &update) override;
 
     size_t
+    LoadModules(LoadedModuleInfoList &module_list) override;
+
+    size_t
     LoadModules() override;
 
     Error
@@ -261,8 +265,6 @@ protected:
     friend class GDBRemoteCommunicationClient;
     friend class GDBRemoteRegisterContext;
 
-    class GDBLoadedModuleInfoList;
-
     //------------------------------------------------------------------
     /// Broadcaster event bits definitions.
     //------------------------------------------------------------------
@@ -429,6 +431,8 @@ protected:
                        const std::vector<lldb::addr_t> &exc_data,
                        lldb::addr_t thread_dispatch_qaddr,
                        bool queue_vars_valid,
+                       lldb_private::LazyBool associated_with_libdispatch_queue,
+                       lldb::addr_t dispatch_queue_t,
                        std::string &queue_name,
                        lldb::QueueKind queue_kind,
                        uint64_t queue_serial);
@@ -461,7 +465,7 @@ protected:
 
     // Query remote GDBServer for a detailed loaded library list
     Error
-    GetLoadedModuleList (GDBLoadedModuleInfoList &);
+    GetLoadedModuleList (LoadedModuleInfoList &);
 
     lldb::ModuleSP
     LoadModuleAtAddress (const FileSpec &file, lldb::addr_t base_addr, bool value_is_offset);
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
index 9b410d8..a4af12c 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
@@ -41,8 +41,10 @@ ThreadGDBRemote::ThreadGDBRemote (Process &process, lldb::tid_t tid) :
     m_thread_name (),
     m_dispatch_queue_name (),
     m_thread_dispatch_qaddr (LLDB_INVALID_ADDRESS),
-    m_queue_kind(eQueueKindUnknown),
-    m_queue_serial(0)
+    m_dispatch_queue_t (LLDB_INVALID_ADDRESS),
+    m_queue_kind (eQueueKindUnknown),
+    m_queue_serial_number (LLDB_INVALID_QUEUE_ID),
+    m_associated_with_libdispatch_queue (eLazyBoolCalculate)
 {
     ProcessGDBRemoteLog::LogIf(GDBR_LOG_THREAD, "%p: ThreadGDBRemote::ThreadGDBRemote (pid = %i, tid = 0x%4.4x)",
                                this, 
@@ -73,15 +75,19 @@ ThreadGDBRemote::ClearQueueInfo ()
 {
     m_dispatch_queue_name.clear();
     m_queue_kind = eQueueKindUnknown;
-    m_queue_serial = 0;
+    m_queue_serial_number = 0;
+    m_dispatch_queue_t = LLDB_INVALID_ADDRESS;
+    m_associated_with_libdispatch_queue = eLazyBoolCalculate;
 }
 
 void
-ThreadGDBRemote::SetQueueInfo (std::string &&queue_name, QueueKind queue_kind, uint64_t queue_serial)
+ThreadGDBRemote::SetQueueInfo (std::string &&queue_name, QueueKind queue_kind, uint64_t queue_serial, addr_t dispatch_queue_t, LazyBool associated_with_libdispatch_queue)
 {
     m_dispatch_queue_name = queue_name;
     m_queue_kind = queue_kind;
-    m_queue_serial = queue_serial;
+    m_queue_serial_number = queue_serial;
+    m_dispatch_queue_t = dispatch_queue_t;
+    m_associated_with_libdispatch_queue = associated_with_libdispatch_queue;
 }
 
 
@@ -100,7 +106,10 @@ ThreadGDBRemote::GetQueueName ()
     }
     // Always re-fetch the dispatch queue name since it can change
 
-    if (m_thread_dispatch_qaddr != 0 || m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS)
+    if (m_associated_with_libdispatch_queue == eLazyBoolNo)
+        return nullptr;
+
+    if (m_thread_dispatch_qaddr != 0 && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS)
     {
         ProcessSP process_sp (GetProcess());
         if (process_sp)
@@ -118,6 +127,35 @@ ThreadGDBRemote::GetQueueName ()
     return NULL;
 }
 
+QueueKind
+ThreadGDBRemote::GetQueueKind ()
+{
+    // If our cached queue info is valid, then someone called ThreadGDBRemote::SetQueueInfo(...)
+    // with valid information that was gleaned from the stop reply packet. In this case we trust
+    // that the info is valid in m_dispatch_queue_name without refetching it
+    if (CachedQueueInfoIsValid())
+    {
+        return m_queue_kind;
+    }
+
+    if (m_associated_with_libdispatch_queue == eLazyBoolNo)
+        return eQueueKindUnknown;
+
+    if (m_thread_dispatch_qaddr != 0 && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS)
+    {
+        ProcessSP process_sp (GetProcess());
+        if (process_sp)
+        {
+            SystemRuntime *runtime = process_sp->GetSystemRuntime ();
+            if (runtime)
+                m_queue_kind = runtime->GetQueueKind (m_thread_dispatch_qaddr);
+            return m_queue_kind;
+        }
+    }
+    return eQueueKindUnknown;
+}
+
+
 queue_id_t
 ThreadGDBRemote::GetQueueID ()
 {
@@ -125,9 +163,12 @@ ThreadGDBRemote::GetQueueID ()
     // with valid information that was gleaned from the stop reply packet. In this case we trust
     // that the info is valid in m_dispatch_queue_name without refetching it
     if (CachedQueueInfoIsValid())
-        return m_queue_serial;
+        return m_queue_serial_number;
+
+    if (m_associated_with_libdispatch_queue == eLazyBoolNo)
+        return LLDB_INVALID_QUEUE_ID;
 
-    if (m_thread_dispatch_qaddr != 0 || m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS)
+    if (m_thread_dispatch_qaddr != 0 && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS)
     {
         ProcessSP process_sp (GetProcess());
         if (process_sp)
@@ -161,20 +202,54 @@ ThreadGDBRemote::GetQueue ()
 addr_t
 ThreadGDBRemote::GetQueueLibdispatchQueueAddress ()
 {
-    addr_t dispatch_queue_t_addr = LLDB_INVALID_ADDRESS;
-    if (m_thread_dispatch_qaddr != 0 || m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS)
+    if (m_dispatch_queue_t == LLDB_INVALID_ADDRESS)
     {
-        ProcessSP process_sp (GetProcess());
-        if (process_sp)
+        if (m_thread_dispatch_qaddr != 0 && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS)
         {
-            SystemRuntime *runtime = process_sp->GetSystemRuntime ();
-            if (runtime)
+            ProcessSP process_sp (GetProcess());
+            if (process_sp)
             {
-                dispatch_queue_t_addr = runtime->GetLibdispatchQueueAddressFromThreadQAddress (m_thread_dispatch_qaddr);
+                SystemRuntime *runtime = process_sp->GetSystemRuntime ();
+                if (runtime)
+                {
+                    m_dispatch_queue_t = runtime->GetLibdispatchQueueAddressFromThreadQAddress (m_thread_dispatch_qaddr);
+                }
             }
         }
     }
-    return dispatch_queue_t_addr;
+    return m_dispatch_queue_t;
+}
+
+void
+ThreadGDBRemote::SetQueueLibdispatchQueueAddress (lldb::addr_t dispatch_queue_t)
+{
+    m_dispatch_queue_t = dispatch_queue_t;
+}
+
+bool
+ThreadGDBRemote::ThreadHasQueueInformation () const
+{
+    if (m_thread_dispatch_qaddr != 0 
+        && m_thread_dispatch_qaddr != LLDB_INVALID_ADDRESS
+        && m_dispatch_queue_t != LLDB_INVALID_ADDRESS
+        && m_queue_kind != eQueueKindUnknown
+        && m_queue_serial_number != 0)
+    {
+        return true;
+    }
+    return false;
+}
+
+LazyBool
+ThreadGDBRemote::GetAssociatedWithLibdispatchQueue ()
+{
+    return m_associated_with_libdispatch_queue;
+}
+
+void
+ThreadGDBRemote::SetAssociatedWithLibdispatchQueue (LazyBool associated_with_libdispatch_queue)
+{
+    m_associated_with_libdispatch_queue = associated_with_libdispatch_queue;
 }
 
 StructuredData::ObjectSP
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h
index 24693ba..d7619f4 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h
@@ -46,6 +46,9 @@ public:
     const char *
     GetQueueName () override;
 
+    lldb::QueueKind
+    GetQueueKind () override;
+
     lldb::queue_id_t
     GetQueueID () override;
 
@@ -55,6 +58,12 @@ public:
     lldb::addr_t
     GetQueueLibdispatchQueueAddress () override;
 
+    void
+    SetQueueLibdispatchQueueAddress (lldb::addr_t dispatch_queue_t) override;
+
+    bool
+    ThreadHasQueueInformation () const override;
+
     lldb::RegisterContextSP
     GetRegisterContext () override;
 
@@ -98,7 +107,13 @@ public:
     ClearQueueInfo ();
     
     void
-    SetQueueInfo (std::string &&queue_name, lldb::QueueKind queue_kind, uint64_t queue_serial);
+    SetQueueInfo (std::string &&queue_name, lldb::QueueKind queue_kind, uint64_t queue_serial, lldb::addr_t dispatch_queue_t, lldb_private::LazyBool associated_with_libdispatch_queue);
+
+    lldb_private::LazyBool
+    GetAssociatedWithLibdispatchQueue () override;
+
+    void
+    SetAssociatedWithLibdispatchQueue (lldb_private::LazyBool associated_with_libdispatch_queue) override;
 
     StructuredData::ObjectSP
     FetchThreadExtendedInfo () override;
@@ -109,8 +124,10 @@ protected:
     std::string m_thread_name;
     std::string m_dispatch_queue_name;
     lldb::addr_t m_thread_dispatch_qaddr;
+    lldb::addr_t m_dispatch_queue_t;
     lldb::QueueKind m_queue_kind;     // Queue info from stop reply/stop info for thread
-    uint64_t m_queue_serial;    // Queue info from stop reply/stop info for thread
+    uint64_t m_queue_serial_number;   // Queue info from stop reply/stop info for thread
+    lldb_private::LazyBool m_associated_with_libdispatch_queue;
 
     bool
     PrivateSetRegisterValue (uint32_t reg, 
diff --git a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index 3107677..23bacc9 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -77,6 +77,10 @@ PythonObject::GetObjectType() const
         return PyObjectType::Dictionary;
     if (PythonString::Check(m_py_obj))
         return PyObjectType::String;
+#if PY_MAJOR_VERSION >= 3
+    if (PythonBytes::Check(m_py_obj))
+        return PyObjectType::Bytes;
+#endif
     if (PythonInteger::Check(m_py_obj))
         return PyObjectType::Integer;
     if (PythonFile::Check(m_py_obj))
@@ -210,6 +214,8 @@ PythonObject::CreateStructuredObject() const
             return PythonList(PyRefType::Borrowed, m_py_obj).CreateStructuredArray();
         case PyObjectType::String:
             return PythonString(PyRefType::Borrowed, m_py_obj).CreateStructuredString();
+        case PyObjectType::Bytes:
+            return PythonBytes(PyRefType::Borrowed, m_py_obj).CreateStructuredString();
         case PyObjectType::None:
             return StructuredData::ObjectSP();
         default:
@@ -220,6 +226,104 @@ PythonObject::CreateStructuredObject() const
 //----------------------------------------------------------------------
 // PythonString
 //----------------------------------------------------------------------
+PythonBytes::PythonBytes() : PythonObject()
+{
+}
+
+PythonBytes::PythonBytes(llvm::ArrayRef<uint8_t> bytes) : PythonObject()
+{
+    SetBytes(bytes);
+}
+
+PythonBytes::PythonBytes(const uint8_t *bytes, size_t length) : PythonObject()
+{
+    SetBytes(llvm::ArrayRef<uint8_t>(bytes, length));
+}
+
+PythonBytes::PythonBytes(PyRefType type, PyObject *py_obj) : PythonObject()
+{
+    Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a string
+}
+
+PythonBytes::PythonBytes(const PythonBytes &object) : PythonObject(object)
+{
+}
+
+PythonBytes::~PythonBytes()
+{
+}
+
+bool
+PythonBytes::Check(PyObject *py_obj)
+{
+    if (!py_obj)
+        return false;
+    if (PyBytes_Check(py_obj))
+        return true;
+    return false;
+}
+
+void
+PythonBytes::Reset(PyRefType type, PyObject *py_obj)
+{
+    // Grab the desired reference type so that if we end up rejecting
+    // `py_obj` it still gets decremented if necessary.
+    PythonObject result(type, py_obj);
+
+    if (!PythonBytes::Check(py_obj))
+    {
+        PythonObject::Reset();
+        return;
+    }
+
+    // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls
+    // back into the virtual implementation.
+    PythonObject::Reset(PyRefType::Borrowed, result.get());
+}
+
+llvm::ArrayRef<uint8_t>
+PythonBytes::GetBytes() const
+{
+    if (!IsValid())
+        return llvm::ArrayRef<uint8_t>();
+
+    Py_ssize_t size;
+    char *c;
+
+    PyBytes_AsStringAndSize(m_py_obj, &c, &size);
+    return llvm::ArrayRef<uint8_t>(reinterpret_cast<uint8_t *>(c), size);
+}
+
+size_t
+PythonBytes::GetSize() const
+{
+    if (!IsValid())
+        return 0;
+    return PyBytes_Size(m_py_obj);
+}
+
+void
+PythonBytes::SetBytes(llvm::ArrayRef<uint8_t> bytes)
+{
+    const char *data = reinterpret_cast<const char *>(bytes.data());
+    PyObject *py_bytes = PyBytes_FromStringAndSize(data, bytes.size());
+    PythonObject::Reset(PyRefType::Owned, py_bytes);
+}
+
+StructuredData::StringSP
+PythonBytes::CreateStructuredString() const
+{
+    StructuredData::StringSP result(new StructuredData::String);
+    Py_ssize_t size;
+    char *c;
+    PyBytes_AsStringAndSize(m_py_obj, &c, &size);
+    result->SetValue(std::string(c, size));
+    return result;
+}
+
+//----------------------------------------------------------------------
+// PythonString
+//----------------------------------------------------------------------
 
 PythonString::PythonString(PyRefType type, PyObject *py_obj)
     : PythonObject()
diff --git a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
index c9d17c0..06264b6 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
@@ -23,8 +23,11 @@
 #include "lldb/Host/File.h"
 #include "lldb/Interpreter/OptionValue.h"
 
+#include "llvm/ADT/ArrayRef.h"
+
 namespace lldb_private {
 
+class PythonBytes;
 class PythonString;
 class PythonList;
 class PythonDictionary;
@@ -71,6 +74,7 @@ enum class PyObjectType
     Dictionary,
     List,
     String,
+    Bytes,
     Module,
     Callable,
     Tuple,
@@ -256,6 +260,39 @@ protected:
     PyObject* m_py_obj;
 };
 
+class PythonBytes : public PythonObject
+{
+public:
+    PythonBytes();
+    explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes);
+    PythonBytes(const uint8_t *bytes, size_t length);
+    PythonBytes(PyRefType type, PyObject *o);
+    PythonBytes(const PythonBytes &object);
+
+    ~PythonBytes() override;
+
+    static bool
+    Check(PyObject *py_obj);
+
+    // Bring in the no-argument base class version
+    using PythonObject::Reset;
+
+    void
+    Reset(PyRefType type, PyObject *py_obj) override;
+
+    llvm::ArrayRef<uint8_t>
+    GetBytes() const;
+
+    size_t
+    GetSize() const;
+
+    void
+    SetBytes(llvm::ArrayRef<uint8_t> stringbytes);
+
+    StructuredData::StringSP
+    CreateStructuredString() const;
+};
+
 class PythonString : public PythonObject
 {
 public:
diff --git a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index b1dd34b..19ad86d 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -1692,10 +1692,10 @@ ScriptInterpreterPython::OSPlugin_RegisterContextData(StructuredData::ObjectSP o
         PyErr_Clear();
     }
 
-    assert(PythonString::Check(py_return.get()) && "get_register_data returned unknown object type!");
+    assert(PythonBytes::Check(py_return.get()) && "get_register_data returned unknown object type!");
 
-    PythonString result_string(PyRefType::Borrowed, py_return.get());
-    return result_string.CreateStructuredString();
+    PythonBytes result(PyRefType::Borrowed, py_return.get());
+    return result.CreateStructuredString();
 }
 
 StructuredData::DictionarySP
diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h
index a5484db..3df07d5 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DIERef.h
@@ -33,6 +33,18 @@ struct DIERef
     lldb::user_id_t
     GetUID() const;
 
+    bool
+    operator< (const DIERef &ref) const
+    {
+        return die_offset < ref.die_offset;
+    }
+
+    bool
+    operator< (const DIERef &ref)
+    {
+        return die_offset < ref.die_offset;
+    }
+
     dw_offset_t cu_offset;
     dw_offset_t die_offset;
 };
diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 68a0285..74b54d6 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -2928,7 +2928,7 @@ DWARFASTParserClang::ParseChildMembers (const SymbolContext& sc,
 
                                         if (member_byte_offset >= parent_byte_size)
                                         {
-                                            if (member_array_size != 1)
+                                            if (member_array_size != 1 && (member_array_size != 0 || member_byte_offset > parent_byte_size))
                                             {
                                                 module_sp->ReportError ("0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8" PRIx64 " which extends beyond the bounds of 0x%8.8" PRIx64,
                                                                         die.GetID(),
diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 0ed4d05..2088864 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -1065,12 +1065,17 @@ SymbolFileDWARF::ParseCompileUnitSupportFiles (const SymbolContext& sc, FileSpec
             const char * cu_comp_dir = resolveCompDir(cu_die.GetAttributeValueAsString(DW_AT_comp_dir, nullptr));
 
             const dw_offset_t stmt_list = cu_die.GetAttributeValueAsUnsigned(DW_AT_stmt_list, DW_INVALID_OFFSET);
-
-            // All file indexes in DWARF are one based and a file of index zero is
-            // supposed to be the compile unit itself.
-            support_files.Append (*sc.comp_unit);
-
-            return DWARFDebugLine::ParseSupportFiles(sc.comp_unit->GetModule(), get_debug_line_data(), cu_comp_dir, stmt_list, support_files);
+            if (stmt_list != DW_INVALID_OFFSET)
+            {
+                // All file indexes in DWARF are one based and a file of index zero is
+                // supposed to be the compile unit itself.
+                support_files.Append (*sc.comp_unit);
+                return DWARFDebugLine::ParseSupportFiles(sc.comp_unit->GetModule(),
+                                                         get_debug_line_data(),
+                                                         cu_comp_dir,
+                                                         stmt_list,
+                                                         support_files);
+            }
         }
     }
     return false;
@@ -2927,6 +2932,40 @@ SymbolFileDWARF::FindFunctions(const RegularExpression& regex, bool include_inli
     return sc_list.GetSize() - original_size;
 }
 
+void
+SymbolFileDWARF::GetMangledNamesForFunction (const std::string &scope_qualified_name,
+                                             std::vector<ConstString> &mangled_names)
+{
+    DWARFDebugInfo* info = DebugInfo();
+    uint32_t num_comp_units = 0;
+    if (info)
+        num_comp_units = info->GetNumCompileUnits();
+
+    for (uint32_t i = 0; i < num_comp_units; i++)
+    {
+        DWARFCompileUnit *cu = info->GetCompileUnitAtIndex(i);
+        if (cu == nullptr)
+            continue;
+
+        SymbolFileDWARFDwo *dwo = cu->GetDwoSymbolFile();
+        if (dwo)
+            dwo->GetMangledNamesForFunction(scope_qualified_name, mangled_names);
+    }
+
+    NameToOffsetMap::iterator iter = m_function_scope_qualified_name_map.find(scope_qualified_name);
+    if (iter == m_function_scope_qualified_name_map.end())
+        return;
+
+    DIERefSetSP set_sp = (*iter).second;
+    std::set<DIERef>::iterator set_iter;
+    for (set_iter = set_sp->begin(); set_iter != set_sp->end(); set_iter++)
+    {
+        DWARFDIE die = DebugInfo()->GetDIE (*set_iter);
+        mangled_names.push_back(ConstString(die.GetMangledName()));
+    }
+}
+
+
 uint32_t
 SymbolFileDWARF::FindTypes (const SymbolContext& sc, 
                             const ConstString &name, 
@@ -3751,6 +3790,24 @@ SymbolFileDWARF::ParseType (const SymbolContext& sc, const DWARFDIE &die, bool *
                     TypeList* type_list = GetTypeList();
                     if (type_list)
                         type_list->Insert(type_sp);
+
+                    if (die.Tag() == DW_TAG_subprogram)
+                    {
+                        DIERef die_ref = die.GetDIERef();
+                        std::string scope_qualified_name(GetDeclContextForUID(die.GetID()).GetScopeQualifiedName().AsCString(""));
+                        if (scope_qualified_name.size())
+                        {
+                            NameToOffsetMap::iterator iter = m_function_scope_qualified_name_map.find(scope_qualified_name);
+                            if (iter != m_function_scope_qualified_name_map.end())
+                                (*iter).second->insert(die_ref);
+                            else
+                            {
+                                DIERefSetSP new_set(new std::set<DIERef>);
+                                new_set->insert(die_ref);
+                                m_function_scope_qualified_name_map.emplace(std::make_pair(scope_qualified_name, new_set));
+                            }
+                        }
+                    }
                 }
             }
         }
diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index c2e78a4..be09759 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -208,6 +208,10 @@ public:
                    bool append,
                    lldb_private::SymbolContextList& sc_list) override;
 
+    void
+    GetMangledNamesForFunction (const std::string &scope_qualified_name,
+                                std::vector<lldb_private::ConstString> &mangled_names) override;
+
     uint32_t
     FindTypes (const lldb_private::SymbolContext& sc,
                const lldb_private::ConstString &name,
@@ -577,6 +581,9 @@ protected:
                                         m_fetched_external_modules:1;
     lldb_private::LazyBool              m_supports_DW_AT_APPLE_objc_complete_type;
 
+    typedef std::shared_ptr<std::set<DIERef> > DIERefSetSP;
+    typedef std::unordered_map<std::string, DIERefSetSP> NameToOffsetMap;
+    NameToOffsetMap m_function_scope_qualified_name_map;
     std::unique_ptr<DWARFDebugRanges>     m_ranges;
     UniqueDWARFASTTypeMap m_unique_ast_type_map;
     DIEToTypePtr m_die_to_type;
diff --git a/contrib/llvm/tools/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp b/contrib/llvm/tools/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
index 7e4c696..76f0b48 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
@@ -155,6 +155,7 @@ private:
     bool mov_reg_to_local_stack_frame_p (int& regno, int& fp_offset);
     bool ret_pattern_p ();
     bool pop_rbp_pattern_p ();
+    bool leave_pattern_p ();
     bool call_next_insn_pattern_p();
     uint32_t extract_4 (uint8_t *b);
     bool machine_regno_to_lldb_regno (int machine_regno, uint32_t& lldb_regno);
@@ -492,6 +493,14 @@ AssemblyParse_x86::pop_rbp_pattern_p ()
     return (*p == 0x5d);
 }
 
+// leave [0xc9]
+bool
+AssemblyParse_x86::leave_pattern_p ()
+{
+    uint8_t *p = m_cur_insn_bytes;
+    return (*p == 0xc9);
+}
+
 // call $0 [0xe8 0x0 0x0 0x0 0x0]
 bool 
 AssemblyParse_x86::call_next_insn_pattern_p ()
@@ -780,8 +789,7 @@ AssemblyParse_x86::get_non_call_site_unwind_plan (UnwindPlan &unwind_plan)
 
                 if (machine_regno == (int)m_machine_fp_regnum)
                 {
-                    row->GetCFAValue().SetIsRegisterPlusOffset (m_lldb_sp_regnum,
-                            row->GetCFAValue().GetOffset());
+                    row->GetCFAValue().SetIsRegisterPlusOffset (m_lldb_sp_regnum, row->GetCFAValue().GetOffset());
                 }
 
                 in_epilogue = true;
@@ -792,12 +800,35 @@ AssemblyParse_x86::get_non_call_site_unwind_plan (UnwindPlan &unwind_plan)
             // we need to add a new row of instructions.
             if (row->GetCFAValue().GetRegisterNumber() == m_lldb_sp_regnum)
             {
-                row->GetCFAValue().SetIsRegisterPlusOffset(m_lldb_sp_regnum,
-                        current_sp_bytes_offset_from_cfa);
+                row->GetCFAValue().SetIsRegisterPlusOffset (m_lldb_sp_regnum, current_sp_bytes_offset_from_cfa);
                 row_updated = true;
             }
         }
 
+        // The LEAVE instruction moves the value from rbp into rsp and pops
+        // a value off the stack into rbp (restoring the caller's rbp value).  
+        // It is the opposite of ENTER, or 'push rbp, mov rsp rbp'.
+        else if (leave_pattern_p ())
+        {
+            // We're going to copy the value in rbp into rsp, so re-set the sp offset
+            // based on the CFAValue.  Also, adjust it to recognize that we're popping
+            // the saved rbp value off the stack.
+            current_sp_bytes_offset_from_cfa = row->GetCFAValue().GetOffset();
+            current_sp_bytes_offset_from_cfa -= m_wordsize;
+            row->GetCFAValue().SetOffset (current_sp_bytes_offset_from_cfa);
+
+            // rbp is restored to the caller's value
+            saved_registers[m_machine_fp_regnum] = false;
+            row->RemoveRegisterInfo (m_lldb_fp_regnum);
+
+            // cfa is now in terms of rsp again.
+            row->GetCFAValue().SetIsRegisterPlusOffset (m_lldb_sp_regnum, row->GetCFAValue().GetOffset());
+            row->GetCFAValue().SetOffset (current_sp_bytes_offset_from_cfa);
+
+            in_epilogue = true;
+            row_updated = true;
+        }
+
         else if (mov_reg_to_local_stack_frame_p (machine_regno, stack_offset) 
                  && nonvolatile_reg_p (machine_regno)
                  && machine_regno_to_lldb_regno (machine_regno, lldb_regno) 
@@ -1137,15 +1168,15 @@ AssemblyParse_x86::augment_unwind_plan_from_call_site (AddressRange& func, Unwin
             // The only case we care about is epilogue:
             //     [0x5d] pop %rbp/%ebp
             //  => [0xc3] ret
-            if (pop_rbp_pattern_p ())
+            if (pop_rbp_pattern_p () || leave_pattern_p ())
             {
                 if (target->ReadMemory (m_cur_insn, prefer_file_cache, m_cur_insn_bytes,
                                         1, error) != static_cast<size_t>(-1)
                     && ret_pattern_p ())
                 {
                     row->SetOffset (offset);
-                    row->GetCFAValue().SetIsRegisterPlusOffset (
-                        first_row->GetCFAValue().GetRegisterNumber(), m_wordsize);
+                    row->GetCFAValue().SetIsRegisterPlusOffset (first_row->GetCFAValue().GetRegisterNumber(), 
+                                                                m_wordsize);
 
                     UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
                     unwind_plan.InsertRow (new_row);
diff --git a/contrib/llvm/tools/lldb/source/Symbol/ClangASTContext.cpp b/contrib/llvm/tools/lldb/source/Symbol/ClangASTContext.cpp
index 8b11c23..621bd16 100644
--- a/contrib/llvm/tools/lldb/source/Symbol/ClangASTContext.cpp
+++ b/contrib/llvm/tools/lldb/source/Symbol/ClangASTContext.cpp
@@ -4158,6 +4158,7 @@ ClangASTContext::GetTypeClass (lldb::opaque_compiler_type_t type)
         case clang::Type::Decltype:                 break;
         case clang::Type::TemplateSpecialization:   break;
         case clang::Type::Atomic:                   break;
+        case clang::Type::Pipe:                     break;
             
             // pointer type decayed from an array or function type.
         case clang::Type::Decayed:                  break;
@@ -4891,6 +4892,7 @@ ClangASTContext::GetEncoding (lldb::opaque_compiler_type_t type, uint64_t &count
         case clang::Type::TemplateSpecialization:
         case clang::Type::Atomic:
         case clang::Type::Adjusted:
+        case clang::Type::Pipe:
             break;
             
             // pointer type decayed from an array or function type.
@@ -5008,6 +5010,7 @@ ClangASTContext::GetFormat (lldb::opaque_compiler_type_t type)
         case clang::Type::TemplateSpecialization:
         case clang::Type::Atomic:
         case clang::Type::Adjusted:
+        case clang::Type::Pipe:
             break;
             
             // pointer type decayed from an array or function type.
@@ -9969,6 +9972,18 @@ ClangASTContext::DeclContextGetName (void *opaque_decl_ctx)
     return ConstString();
 }
 
+ConstString
+ClangASTContext::DeclContextGetScopeQualifiedName (void *opaque_decl_ctx)
+{
+    if (opaque_decl_ctx)
+    {
+        clang::NamedDecl *named_decl = llvm::dyn_cast<clang::NamedDecl>((clang::DeclContext *)opaque_decl_ctx);
+        if (named_decl)
+            return ConstString(llvm::StringRef(named_decl->getQualifiedNameAsString()));
+    }
+    return ConstString();
+}
+
 bool
 ClangASTContext::DeclContextIsClassMethod (void *opaque_decl_ctx,
                                            lldb::LanguageType *language_ptr,
diff --git a/contrib/llvm/tools/lldb/source/Symbol/CompilerDeclContext.cpp b/contrib/llvm/tools/lldb/source/Symbol/CompilerDeclContext.cpp
index e44cee6..8bee1b4 100644
--- a/contrib/llvm/tools/lldb/source/Symbol/CompilerDeclContext.cpp
+++ b/contrib/llvm/tools/lldb/source/Symbol/CompilerDeclContext.cpp
@@ -38,6 +38,15 @@ CompilerDeclContext::GetName () const
         return ConstString();
 }
 
+ConstString
+CompilerDeclContext::GetScopeQualifiedName () const
+{
+    if (IsValid())
+        return m_type_system->DeclContextGetScopeQualifiedName(m_opaque_decl_ctx);
+    else
+        return ConstString();
+}
+
 bool
 CompilerDeclContext::IsStructUnionOrClass () const
 {
diff --git a/contrib/llvm/tools/lldb/source/Symbol/LineTable.cpp b/contrib/llvm/tools/lldb/source/Symbol/LineTable.cpp
index 01c1718..f9a42a7 100644
--- a/contrib/llvm/tools/lldb/source/Symbol/LineTable.cpp
+++ b/contrib/llvm/tools/lldb/source/Symbol/LineTable.cpp
@@ -143,6 +143,13 @@ LineTable::InsertSequence (LineSequence* sequence)
     entry_collection::iterator end_pos = m_entries.end();
     LineTable::Entry::LessThanBinaryPredicate less_than_bp(this);
     entry_collection::iterator pos = upper_bound(begin_pos, end_pos, entry, less_than_bp);
+
+    // We should never insert a sequence in the middle of another sequence
+    if (pos != begin_pos) {
+        while (pos < end_pos && !((pos - 1)->is_terminal_entry))
+            pos++;
+    }
+
 #ifdef LLDB_CONFIGURATION_DEBUG
     // If we aren't inserting at the beginning, the previous entry should
     // terminate a sequence.
diff --git a/contrib/llvm/tools/lldb/source/Symbol/SymbolFile.cpp b/contrib/llvm/tools/lldb/source/Symbol/SymbolFile.cpp
index 51e3504..82bbceb 100644
--- a/contrib/llvm/tools/lldb/source/Symbol/SymbolFile.cpp
+++ b/contrib/llvm/tools/lldb/source/Symbol/SymbolFile.cpp
@@ -134,6 +134,12 @@ SymbolFile::FindFunctions (const RegularExpression& regex, bool include_inlines,
     return 0;
 }
 
+void
+SymbolFile::GetMangledNamesForFunction(const std::string &scope_qualified_name, std::vector<ConstString> &mangled_names)
+{
+    return;
+}
+
 uint32_t
 SymbolFile::FindTypes (const SymbolContext& sc, const ConstString &name, const CompilerDeclContext *parent_decl_ctx, bool append, uint32_t max_matches, TypeMap& types)
 {
diff --git a/contrib/llvm/tools/lldb/source/Target/Process.cpp b/contrib/llvm/tools/lldb/source/Target/Process.cpp
index 311c695..e4fe419 100644
--- a/contrib/llvm/tools/lldb/source/Target/Process.cpp
+++ b/contrib/llvm/tools/lldb/source/Target/Process.cpp
@@ -6515,3 +6515,65 @@ Process::ResetImageToken(size_t token)
     if (token < m_image_tokens.size())
         m_image_tokens[token] = LLDB_INVALID_IMAGE_TOKEN;
 }
+
+Address
+Process::AdvanceAddressToNextBranchInstruction (Address default_stop_addr, AddressRange range_bounds)
+{
+    Target &target = GetTarget();
+    DisassemblerSP disassembler_sp;
+    InstructionList *insn_list = NULL;
+
+    Address retval = default_stop_addr;
+
+    if (target.GetUseFastStepping() == false)
+        return retval;
+    if (default_stop_addr.IsValid() == false)
+        return retval;
+
+    ExecutionContext exe_ctx (this);
+    const char *plugin_name = nullptr;
+    const char *flavor = nullptr;
+    const bool prefer_file_cache = true;
+    disassembler_sp = Disassembler::DisassembleRange(target.GetArchitecture(),
+                                                     plugin_name,
+                                                     flavor,
+                                                     exe_ctx,
+                                                     range_bounds,
+                                                     prefer_file_cache);
+    if (disassembler_sp.get())
+        insn_list = &disassembler_sp->GetInstructionList();
+
+    if (insn_list == NULL)
+    {
+        return retval;
+    }
+
+    size_t insn_offset = insn_list->GetIndexOfInstructionAtAddress (default_stop_addr);
+    if (insn_offset == UINT32_MAX)
+    {
+        return retval;
+    }
+
+    uint32_t branch_index = insn_list->GetIndexOfNextBranchInstruction (insn_offset, target);
+    if (branch_index == UINT32_MAX)
+    {
+        return retval;
+    }
+
+    if (branch_index > insn_offset)
+    {
+        Address next_branch_insn_address = insn_list->GetInstructionAtIndex (branch_index)->GetAddress();
+        if (next_branch_insn_address.IsValid() && range_bounds.ContainsFileAddress (next_branch_insn_address))
+        {
+            retval = next_branch_insn_address;
+        }
+    }
+
+    if (disassembler_sp.get())
+    {
+        // FIXME: The DisassemblerLLVMC has a reference cycle and won't go away if it has any active instructions.
+        disassembler_sp->GetInstructionList().Clear();
+    }
+
+    return retval;
+}
diff --git a/contrib/llvm/tools/lldb/source/Target/Thread.cpp b/contrib/llvm/tools/lldb/source/Target/Thread.cpp
index 9f9da97..551e480 100644
--- a/contrib/llvm/tools/lldb/source/Target/Thread.cpp
+++ b/contrib/llvm/tools/lldb/source/Target/Thread.cpp
@@ -1591,7 +1591,7 @@ Thread::QueueThreadPlanForStepOut(bool abort_other_plans,
                                   Vote stop_vote,
                                   Vote run_vote,
                                   uint32_t frame_idx,
-                                  LazyBool step_out_avoids_code_withoug_debug_info)
+                                  LazyBool step_out_avoids_code_without_debug_info)
 {
     ThreadPlanSP thread_plan_sp (new ThreadPlanStepOut (*this, 
                                                         addr_context, 
@@ -1600,7 +1600,7 @@ Thread::QueueThreadPlanForStepOut(bool abort_other_plans,
                                                         stop_vote, 
                                                         run_vote, 
                                                         frame_idx,
-                                                        step_out_avoids_code_withoug_debug_info));
+                                                        step_out_avoids_code_without_debug_info));
     
     if (thread_plan_sp->ValidatePlan(nullptr))
     {
@@ -1620,7 +1620,8 @@ Thread::QueueThreadPlanForStepOutNoShouldStop(bool abort_other_plans,
                                               bool stop_other_threads,
                                               Vote stop_vote,
                                               Vote run_vote,
-                                              uint32_t frame_idx)
+                                              uint32_t frame_idx,
+                                              bool continue_to_next_branch)
 {
     ThreadPlanSP thread_plan_sp(new ThreadPlanStepOut (*this,
                                                         addr_context, 
@@ -1629,7 +1630,8 @@ Thread::QueueThreadPlanForStepOutNoShouldStop(bool abort_other_plans,
                                                         stop_vote, 
                                                         run_vote, 
                                                         frame_idx,
-                                                        eLazyBoolNo));
+                                                        eLazyBoolNo,
+                                                        continue_to_next_branch));
 
     ThreadPlanStepOut *new_plan = static_cast<ThreadPlanStepOut *>(thread_plan_sp.get());
     new_plan->ClearShouldStopHereCallbacks();
diff --git a/contrib/llvm/tools/lldb/source/Target/ThreadPlanShouldStopHere.cpp b/contrib/llvm/tools/lldb/source/Target/ThreadPlanShouldStopHere.cpp
index 88f8db2..55aaaf0 100644
--- a/contrib/llvm/tools/lldb/source/Target/ThreadPlanShouldStopHere.cpp
+++ b/contrib/llvm/tools/lldb/source/Target/ThreadPlanShouldStopHere.cpp
@@ -135,7 +135,8 @@ ThreadPlanShouldStopHere::DefaultStepFromHereCallback (ThreadPlan *current_plan,
                                                                                          stop_others,
                                                                                          eVoteNo,
                                                                                          eVoteNoOpinion,
-                                                                                         frame_index);
+                                                                                         frame_index,
+                                                                                         true);
     return return_plan_sp;
 }
 
diff --git a/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOut.cpp b/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOut.cpp
index 92403cb..82b823b 100644
--- a/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOut.cpp
+++ b/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOut.cpp
@@ -18,6 +18,7 @@
 #include "lldb/Core/ValueObjectConstResult.h"
 #include "lldb/Symbol/Block.h"
 #include "lldb/Symbol/Function.h"
+#include "lldb/Symbol/Symbol.h"
 #include "lldb/Symbol/Type.h"
 #include "lldb/Target/ABI.h"
 #include "lldb/Target/Process.h"
@@ -44,7 +45,8 @@ ThreadPlanStepOut::ThreadPlanStepOut
     Vote stop_vote,
     Vote run_vote,
     uint32_t frame_idx,
-    LazyBool step_out_avoids_code_without_debug_info
+    LazyBool step_out_avoids_code_without_debug_info,
+    bool continue_to_next_branch
 ) :
     ThreadPlan (ThreadPlan::eKindStepOut, "Step out", thread, stop_vote, run_vote),
     ThreadPlanShouldStopHere (this),
@@ -86,7 +88,8 @@ ThreadPlanStepOut::ThreadPlanStepOut
                                                                      eVoteNoOpinion,
                                                                      eVoteNoOpinion,
                                                                      frame_idx - 1,
-                                                                     eLazyBoolNo));
+                                                                     eLazyBoolNo,
+                                                                     continue_to_next_branch));
             static_cast<ThreadPlanStepOut *>(m_step_out_to_inline_plan_sp.get())->SetShouldStopHereCallbacks(nullptr, nullptr);
             m_step_out_to_inline_plan_sp->SetPrivate(true);
         }
@@ -101,7 +104,27 @@ ThreadPlanStepOut::ThreadPlanStepOut
         // Find the return address and set a breakpoint there:
         // FIXME - can we do this more securely if we know first_insn?
 
-        m_return_addr = return_frame_sp->GetFrameCodeAddress().GetLoadAddress(&m_thread.GetProcess()->GetTarget());
+        Address return_address (return_frame_sp->GetFrameCodeAddress());
+        if (continue_to_next_branch)
+        {
+            SymbolContext return_address_sc;
+            AddressRange range;
+            Address return_address_decr_pc = return_address;
+            if (return_address_decr_pc.GetOffset() > 0)
+                return_address_decr_pc.Slide (-1);
+
+            return_address_decr_pc.CalculateSymbolContext (&return_address_sc, lldb::eSymbolContextLineEntry);
+            if (return_address_sc.line_entry.IsValid())
+            {
+                range = return_address_sc.line_entry.GetSameLineContiguousAddressRange();
+                if (range.GetByteSize() > 0)
+                {
+                    return_address = m_thread.GetProcess()->AdvanceAddressToNextBranchInstruction (return_address, 
+                                                                                                   range);
+                }
+            }
+        }
+        m_return_addr = return_address.GetLoadAddress(&m_thread.GetProcess()->GetTarget());
         
         if (m_return_addr == LLDB_INVALID_ADDRESS)
             return;
diff --git a/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOverRange.cpp b/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOverRange.cpp
index 08655be..2e731a8 100644
--- a/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOverRange.cpp
+++ b/contrib/llvm/tools/lldb/source/Target/ThreadPlanStepOverRange.cpp
@@ -185,7 +185,8 @@ ThreadPlanStepOverRange::ShouldStop (Event *event_ptr)
                                                                              stop_others,
                                                                              eVoteNo,
                                                                              eVoteNoOpinion,
-                                                                             0);
+                                                                             0,
+                                                                             true);
                 break;
             }
             else
diff --git a/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp b/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp
index 6c537d4..0b75e20 100644
--- a/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp
+++ b/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp
@@ -1,244 +1,69 @@
-#include "llvm/Config/config.h"
-#include "../RPCChannel.h"
-#include "../RemoteTarget.h"
-#include "../RemoteTargetMessage.h"
-#include "llvm/Support/Memory.h"
-#include <assert.h>
-#include <map>
-#include <stdint.h>
-#include <string>
-#include <vector>
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Process.h"
+#include <sstream>
 
-using namespace llvm;
+#include "../RemoteJITUtils.h"
 
-class LLIChildTarget {
-public:
-  void initialize();
-  LLIMessageType waitForIncomingMessage();
-  void handleMessage(LLIMessageType messageType);
-  RemoteTarget *RT;
-  RPCChannel RPC;
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::sys;
 
-private:
-  // Incoming message handlers
-  void handleAllocateSpace();
-  void handleLoadSection(bool IsCode);
-  void handleExecute();
+#ifdef __x86_64__
+typedef OrcX86_64 HostOrcArch;
+#else
+typedef OrcGenericArchitecture HostOrcArch;
+#endif
 
-  // Outgoing message handlers
-  void sendChildActive();
-  void sendAllocationResult(uint64_t Addr);
-  void sendLoadStatus(uint32_t Status);
-  void sendExecutionComplete(int Result);
+int main(int argc, char *argv[]) {
 
-  // OS-specific functions
-  void initializeConnection();
-  int WriteBytes(const void *Data, size_t Size) {
-    return RPC.WriteBytes(Data, Size) ? Size : -1;
+  if (argc != 3) {
+    errs() << "Usage: " << argv[0] << " <input fd> <output fd>\n";
+    return 1;
   }
-  int ReadBytes(void *Data, size_t Size) {
-    return RPC.ReadBytes(Data, Size) ? Size : -1;
-  }
-
-  // Communication handles (OS-specific)
-  void *ConnectionData;
-};
-
-int main() {
-  LLIChildTarget  ThisChild;
-  ThisChild.RT = new RemoteTarget();
-  ThisChild.initialize();
-  LLIMessageType MsgType;
-  do {
-    MsgType = ThisChild.waitForIncomingMessage();
-    ThisChild.handleMessage(MsgType);
-  } while (MsgType != LLI_Terminate &&
-           MsgType != LLI_Error);
-  delete ThisChild.RT;
-  return 0;
-}
 
-// Public methods
-void LLIChildTarget::initialize() {
-  RPC.createClient();
-  sendChildActive();
-}
+  int InFD;
+  int OutFD;
+  {
+    std::istringstream InFDStream(argv[1]), OutFDStream(argv[2]);
+    InFDStream >> InFD;
+    OutFDStream >> OutFD;
+  }
 
-LLIMessageType LLIChildTarget::waitForIncomingMessage() {
-  int32_t MsgType = -1;
-  if (ReadBytes(&MsgType, 4) > 0)
-    return (LLIMessageType)MsgType;
-  return LLI_Error;
-}
+  if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
+    errs() << "Error loading program symbols.\n";
+    return 1;
+  }
 
-void LLIChildTarget::handleMessage(LLIMessageType messageType) {
-  switch (messageType) {
-    case LLI_AllocateSpace:
-      handleAllocateSpace();
-      break;
-    case LLI_LoadCodeSection:
-      handleLoadSection(true);
-      break;
-    case LLI_LoadDataSection:
-      handleLoadSection(false);
-      break;
-    case LLI_Execute:
-      handleExecute();
-      break;
-    case LLI_Terminate:
-      RT->stop();
-      break;
+  auto SymbolLookup = [](const std::string &Name) {
+    return RTDyldMemoryManager::getSymbolAddressInProcess(Name);
+  };
+
+  FDRPCChannel Channel(InFD, OutFD);
+  typedef remote::OrcRemoteTargetServer<FDRPCChannel, HostOrcArch> JITServer;
+  JITServer Server(Channel, SymbolLookup);
+
+  while (1) {
+    JITServer::JITProcId Id = JITServer::InvalidId;
+    if (auto EC = Server.getNextProcId(Id)) {
+      errs() << "Error: " << EC.message() << "\n";
+      return 1;
+    }
+    switch (Id) {
+    case JITServer::TerminateSessionId:
+      return 0;
     default:
-      // FIXME: Handle error!
-      break;
+      if (auto EC = Server.handleKnownProcedure(Id)) {
+        errs() << "Error: " << EC.message() << "\n";
+        return 1;
+      }
+    }
   }
-}
-
-// Incoming message handlers
-void LLIChildTarget::handleAllocateSpace() {
-  // Read and verify the message data size.
-  uint32_t DataSize = 0;
-  int rc = ReadBytes(&DataSize, 4);
-  (void)rc;
-  assert(rc == 4);
-  assert(DataSize == 8);
-
-  // Read the message arguments.
-  uint32_t Alignment = 0;
-  uint32_t AllocSize = 0;
-  rc = ReadBytes(&Alignment, 4);
-  assert(rc == 4);
-  rc = ReadBytes(&AllocSize, 4);
-  assert(rc == 4);
-
-  // Allocate the memory.
-  uint64_t Addr;
-  RT->allocateSpace(AllocSize, Alignment, Addr);
-
-  // Send AllocationResult message.
-  sendAllocationResult(Addr);
-}
-
-void LLIChildTarget::handleLoadSection(bool IsCode) {
-  // Read the message data size.
-  uint32_t DataSize = 0;
-  int rc = ReadBytes(&DataSize, 4);
-  (void)rc;
-  assert(rc == 4);
-
-  // Read the target load address.
-  uint64_t Addr = 0;
-  rc = ReadBytes(&Addr, 8);
-  assert(rc == 8);
-  size_t BufferSize = DataSize - 8;
-
-  if (!RT->isAllocatedMemory(Addr, BufferSize))
-    return sendLoadStatus(LLI_Status_NotAllocated);
-
-  // Read section data into previously allocated buffer
-  rc = ReadBytes((void*)Addr, BufferSize);
-  if (rc != (int)(BufferSize))
-    return sendLoadStatus(LLI_Status_IncompleteMsg);
-
-  // If IsCode, mark memory executable
-  if (IsCode)
-    sys::Memory::InvalidateInstructionCache((void *)Addr, BufferSize);
-
-  // Send MarkLoadComplete message.
-  sendLoadStatus(LLI_Status_Success);
-}
-
-void LLIChildTarget::handleExecute() {
-  // Read the message data size.
-  uint32_t DataSize = 0;
-  int rc = ReadBytes(&DataSize, 4);
-  (void)rc;
-  assert(rc == 4);
-  assert(DataSize == 8);
-
-  // Read the target address.
-  uint64_t Addr = 0;
-  rc = ReadBytes(&Addr, 8);
-  assert(rc == 8);
-
-  // Call function
-  int32_t Result = -1;
-  RT->executeCode(Addr, Result);
-
-  // Send ExecutionResult message.
-  sendExecutionComplete(Result);
-}
-
-// Outgoing message handlers
-void LLIChildTarget::sendChildActive() {
-  // Write the message type.
-  uint32_t MsgType = (uint32_t)LLI_ChildActive;
-  int rc = WriteBytes(&MsgType, 4);
-  (void)rc;
-  assert(rc == 4);
-
-  // Write the data size.
-  uint32_t DataSize = 0;
-  rc = WriteBytes(&DataSize, 4);
-  assert(rc == 4);
-}
-
-void LLIChildTarget::sendAllocationResult(uint64_t Addr) {
-  // Write the message type.
-  uint32_t MsgType = (uint32_t)LLI_AllocationResult;
-  int rc = WriteBytes(&MsgType, 4);
-  (void)rc;
-  assert(rc == 4);
-
-  // Write the data size.
-  uint32_t DataSize = 8;
-  rc = WriteBytes(&DataSize, 4);
-  assert(rc == 4);
-
-  // Write the allocated address.
-  rc = WriteBytes(&Addr, 8);
-  assert(rc == 8);
-}
-
-void LLIChildTarget::sendLoadStatus(uint32_t Status) {
-  // Write the message type.
-  uint32_t MsgType = (uint32_t)LLI_LoadResult;
-  int rc = WriteBytes(&MsgType, 4);
-  (void)rc;
-  assert(rc == 4);
-
-  // Write the data size.
-  uint32_t DataSize = 4;
-  rc = WriteBytes(&DataSize, 4);
-  assert(rc == 4);
-
-  // Write the result.
-  rc = WriteBytes(&Status, 4);
-  assert(rc == 4);
-}
-
-void LLIChildTarget::sendExecutionComplete(int Result) {
-  // Write the message type.
-  uint32_t MsgType = (uint32_t)LLI_ExecutionResult;
-  int rc = WriteBytes(&MsgType, 4);
-  (void)rc;
-  assert(rc == 4);
 
+  close(InFD);
+  close(OutFD);
 
-  // Write the data size.
-  uint32_t DataSize = 4;
-  rc = WriteBytes(&DataSize, 4);
-  assert(rc == 4);
-
-  // Write the result.
-  rc = WriteBytes(&Result, 4);
-  assert(rc == 4);
+  return 0;
 }
-
-#ifdef LLVM_ON_UNIX
-#include "../Unix/RPCChannel.inc"
-#endif
-
-#ifdef LLVM_ON_WIN32
-#include "../Windows/RPCChannel.inc"
-#endif
diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.cpp b/contrib/llvm/tools/lli/OrcLazyJIT.cpp
index 4235145..7f483f7 100644
--- a/contrib/llvm/tools/lli/OrcLazyJIT.cpp
+++ b/contrib/llvm/tools/lli/OrcLazyJIT.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "OrcLazyJIT.h"
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include <cstdio>
diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.h b/contrib/llvm/tools/lli/OrcLazyJIT.h
index bb4da33..2a5b31d 100644
--- a/contrib/llvm/tools/lli/OrcLazyJIT.h
+++ b/contrib/llvm/tools/lli/OrcLazyJIT.h
@@ -105,7 +105,9 @@ public:
     // Add the module to the JIT.
     std::vector<std::unique_ptr<Module>> S;
     S.push_back(std::move(M));
-    auto H = CODLayer.addModuleSet(std::move(S), nullptr, std::move(Resolver));
+    auto H = CODLayer.addModuleSet(std::move(S),
+				   llvm::make_unique<SectionMemoryManager>(),
+				   std::move(Resolver));
 
     // Run the static constructors, and save the static destructor runner for
     // execution when the JIT is torn down.
diff --git a/contrib/llvm/tools/lli/RPCChannel.h b/contrib/llvm/tools/lli/RPCChannel.h
deleted file mode 100644
index ebd3c65..0000000
--- a/contrib/llvm/tools/lli/RPCChannel.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===---------- RPCChannel.h - LLVM out-of-process JIT execution ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of the RemoteTargetExternal class which executes JITed code in a
-// separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_RPCCHANNEL_H
-#define LLVM_TOOLS_LLI_RPCCHANNEL_H
-
-#include <stdlib.h>
-#include <string>
-
-namespace llvm {
-
-class RPCChannel {
-public:
-  std::string ChildName;
-
-  RPCChannel() {}
-  ~RPCChannel();
-
-  /// Start the remote process.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  bool createServer();
-
-  bool createClient();
-
-  // This will get filled in as a point to an OS-specific structure.
-  void *ConnectionData;
-
-  bool WriteBytes(const void *Data, size_t Size);
-  bool ReadBytes(void *Data, size_t Size);
-
-  void Wait();
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/tools/lli/RemoteJITUtils.h b/contrib/llvm/tools/lli/RemoteJITUtils.h
new file mode 100644
index 0000000..a3f3fa0
--- /dev/null
+++ b/contrib/llvm/tools/lli/RemoteJITUtils.h
@@ -0,0 +1,131 @@
+//===-- RemoteJITUtils.h - Utilities for remote-JITing with LLI -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for remote-JITing with LLI.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLI_REMOTEJITUTILS_H
+#define LLVM_TOOLS_LLI_REMOTEJITUTILS_H
+
+#include "llvm/ExecutionEngine/Orc/RPCChannel.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
+/// RPC channel that reads from and writes from file descriptors.
+class FDRPCChannel : public llvm::orc::remote::RPCChannel {
+public:
+  FDRPCChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
+
+  std::error_code readBytes(char *Dst, unsigned Size) override {
+    assert(Dst && "Attempt to read into null.");
+    ssize_t ReadResult = ::read(InFD, Dst, Size);
+    if (ReadResult != (ssize_t)Size)
+      return std::error_code(errno, std::generic_category());
+    return std::error_code();
+  }
+
+  std::error_code appendBytes(const char *Src, unsigned Size) override {
+    assert(Src && "Attempt to append from null.");
+    ssize_t WriteResult = ::write(OutFD, Src, Size);
+    if (WriteResult != (ssize_t)Size)
+      std::error_code(errno, std::generic_category());
+    return std::error_code();
+  }
+
+  std::error_code send() override { return std::error_code(); }
+
+private:
+  int InFD, OutFD;
+};
+
+// launch the remote process (see lli.cpp) and return a channel to it.
+std::unique_ptr<FDRPCChannel> launchRemote();
+
+namespace llvm {
+
+// ForwardingMM - Adapter to connect MCJIT to Orc's Remote memory manager.
+class ForwardingMemoryManager : public llvm::RTDyldMemoryManager {
+public:
+  void setMemMgr(std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr) {
+    this->MemMgr = std::move(MemMgr);
+  }
+
+  void setResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> Resolver) {
+    this->Resolver = std::move(Resolver);
+  }
+
+  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID,
+                               StringRef SectionName) override {
+    return MemMgr->allocateCodeSection(Size, Alignment, SectionID, SectionName);
+  }
+
+  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID, StringRef SectionName,
+                               bool IsReadOnly) override {
+    return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName,
+                                       IsReadOnly);
+  }
+
+  void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+                              uintptr_t RODataSize, uint32_t RODataAlign,
+                              uintptr_t RWDataSize,
+                              uint32_t RWDataAlign) override {
+    MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign,
+                                   RWDataSize, RWDataAlign);
+  }
+
+  bool needsToReserveAllocationSpace() override {
+    return MemMgr->needsToReserveAllocationSpace();
+  }
+
+  void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+                        size_t Size) override {
+    MemMgr->registerEHFrames(Addr, LoadAddr, Size);
+  }
+
+  void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+                          size_t Size) override {
+    MemMgr->deregisterEHFrames(Addr, LoadAddr, Size);
+  }
+
+  bool finalizeMemory(std::string *ErrMsg = nullptr) override {
+    return MemMgr->finalizeMemory(ErrMsg);
+  }
+
+  void notifyObjectLoaded(RuntimeDyld &RTDyld,
+                          const object::ObjectFile &Obj) override {
+    MemMgr->notifyObjectLoaded(RTDyld, Obj);
+  }
+
+  // Don't hide the sibling notifyObjectLoaded from RTDyldMemoryManager.
+  using RTDyldMemoryManager::notifyObjectLoaded;
+
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
+    return Resolver->findSymbol(Name);
+  }
+
+  RuntimeDyld::SymbolInfo
+  findSymbolInLogicalDylib(const std::string &Name) override {
+    return Resolver->findSymbolInLogicalDylib(Name);
+  }
+
+private:
+  std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr;
+  std::unique_ptr<RuntimeDyld::SymbolResolver> Resolver;
+};
+}
+
+#endif
diff --git a/contrib/llvm/tools/lli/RemoteMemoryManager.cpp b/contrib/llvm/tools/lli/RemoteMemoryManager.cpp
deleted file mode 100644
index 0a16210..0000000
--- a/contrib/llvm/tools/lli/RemoteMemoryManager.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-//===---- RemoteMemoryManager.cpp - Recording memory manager --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This memory manager allocates local storage and keeps a record of each
-// allocation. Iterators are provided for all data and code allocations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "RemoteMemoryManager.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "lli"
-
-RemoteMemoryManager::~RemoteMemoryManager() {
-  for (SmallVector<Allocation, 2>::iterator
-         I = AllocatedSections.begin(), E = AllocatedSections.end();
-       I != E; ++I)
-    sys::Memory::releaseMappedMemory(I->MB);
-}
-
-uint8_t *RemoteMemoryManager::
-allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID,
-                    StringRef SectionName) {
-  // The recording memory manager is just a local copy of the remote target.
-  // The alignment requirement is just stored here for later use. Regular
-  // heap storage is sufficient here, but we're using mapped memory to work
-  // around a bug in MCJIT.
-  sys::MemoryBlock Block = allocateSection(Size);
-  // AllocatedSections will own this memory.
-  AllocatedSections.push_back( Allocation(Block, Alignment, true) );
-  // UnmappedSections has the same information but does not own the memory.
-  UnmappedSections.push_back( Allocation(Block, Alignment, true) );
-  return (uint8_t*)Block.base();
-}
-
-uint8_t *RemoteMemoryManager::
-allocateDataSection(uintptr_t Size, unsigned Alignment,
-                    unsigned SectionID, StringRef SectionName,
-                    bool IsReadOnly) {
-  // The recording memory manager is just a local copy of the remote target.
-  // The alignment requirement is just stored here for later use. Regular
-  // heap storage is sufficient here, but we're using mapped memory to work
-  // around a bug in MCJIT.
-  sys::MemoryBlock Block = allocateSection(Size);
-  // AllocatedSections will own this memory.
-  AllocatedSections.push_back( Allocation(Block, Alignment, false) );
-  // UnmappedSections has the same information but does not own the memory.
-  UnmappedSections.push_back( Allocation(Block, Alignment, false) );
-  return (uint8_t*)Block.base();
-}
-
-sys::MemoryBlock RemoteMemoryManager::allocateSection(uintptr_t Size) {
-  std::error_code ec;
-  sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(Size,
-                                                          &Near,
-                                                          sys::Memory::MF_READ |
-                                                          sys::Memory::MF_WRITE,
-                                                          ec);
-  assert(!ec && MB.base());
-
-  // FIXME: This is part of a work around to keep sections near one another
-  // when MCJIT performs relocations after code emission but before
-  // the generated code is moved to the remote target.
-  // Save this address as the basis for our next request
-  Near = MB;
-  return MB;
-}
-
-void RemoteMemoryManager::notifyObjectLoaded(ExecutionEngine *EE,
-                                             const object::ObjectFile &Obj) {
-  // The client should have called setRemoteTarget() before triggering any
-  // code generation.
-  assert(Target);
-  if (!Target)
-    return;
-
-  // FIXME: Make this function thread safe.
-
-  // Lay out our sections in order, with all the code sections first, then
-  // all the data sections.
-  uint64_t CurOffset = 0;
-  unsigned MaxAlign = Target->getPageAlignment();
-  SmallVector<std::pair<Allocation, uint64_t>, 16> Offsets;
-  unsigned NumSections = UnmappedSections.size();
-  // We're going to go through the list twice to separate code and data, but
-  // it's a very small list, so that's OK.
-  for (size_t i = 0, e = NumSections; i != e; ++i) {
-    Allocation &Section = UnmappedSections[i];
-    if (Section.IsCode) {
-      unsigned Size = Section.MB.size();
-      unsigned Align = Section.Alignment;
-      DEBUG(dbgs() << "code region: size " << Size
-                  << ", alignment " << Align << "\n");
-      // Align the current offset up to whatever is needed for the next
-      // section.
-      CurOffset = (CurOffset + Align - 1) / Align * Align;
-      // Save off the address of the new section and allocate its space.
-      Offsets.push_back(std::pair<Allocation,uint64_t>(Section, CurOffset));
-      CurOffset += Size;
-    }
-  }
-  // Adjust to keep code and data aligned on separate pages.
-  CurOffset = (CurOffset + MaxAlign - 1) / MaxAlign * MaxAlign;
-  for (size_t i = 0, e = NumSections; i != e; ++i) {
-    Allocation &Section = UnmappedSections[i];
-    if (!Section.IsCode) {
-      unsigned Size = Section.MB.size();
-      unsigned Align = Section.Alignment;
-      DEBUG(dbgs() << "data region: size " << Size
-                  << ", alignment " << Align << "\n");
-      // Align the current offset up to whatever is needed for the next
-      // section.
-      CurOffset = (CurOffset + Align - 1) / Align * Align;
-      // Save off the address of the new section and allocate its space.
-      Offsets.push_back(std::pair<Allocation,uint64_t>(Section, CurOffset));
-      CurOffset += Size;
-    }
-  }
-
-  // Allocate space in the remote target.
-  uint64_t RemoteAddr;
-  if (!Target->allocateSpace(CurOffset, MaxAlign, RemoteAddr))
-    report_fatal_error(Target->getErrorMsg());
-
-  // Map the section addresses so relocations will get updated in the local
-  // copies of the sections.
-  for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
-    uint64_t Addr = RemoteAddr + Offsets[i].second;
-    EE->mapSectionAddress(const_cast<void*>(Offsets[i].first.MB.base()), Addr);
-
-    DEBUG(dbgs() << "  Mapping local: " << Offsets[i].first.MB.base()
-                 << " to remote: 0x" << format("%llx", Addr) << "\n");
-
-    MappedSections[Addr] = Offsets[i].first;
-  }
-
-  UnmappedSections.clear();
-}
-
-bool RemoteMemoryManager::finalizeMemory(std::string *ErrMsg) {
-  // FIXME: Make this function thread safe.
-  for (DenseMap<uint64_t, Allocation>::iterator
-         I = MappedSections.begin(), E = MappedSections.end();
-       I != E; ++I) {
-    uint64_t RemoteAddr = I->first;
-    const Allocation &Section = I->second;
-    if (Section.IsCode) {
-      if (!Target->loadCode(RemoteAddr, Section.MB.base(), Section.MB.size()))
-        report_fatal_error(Target->getErrorMsg());
-      DEBUG(dbgs() << "  loading code: " << Section.MB.base()
-            << " to remote: 0x" << format("%llx", RemoteAddr) << "\n");
-    } else {
-      if (!Target->loadData(RemoteAddr, Section.MB.base(), Section.MB.size()))
-        report_fatal_error(Target->getErrorMsg());
-      DEBUG(dbgs() << "  loading data: " << Section.MB.base()
-            << " to remote: 0x" << format("%llx", RemoteAddr) << "\n");
-    }
-  }
-
-  MappedSections.clear();
-
-  return false;
-}
diff --git a/contrib/llvm/tools/lli/RemoteMemoryManager.h b/contrib/llvm/tools/lli/RemoteMemoryManager.h
deleted file mode 100644
index 5733fa5..0000000
--- a/contrib/llvm/tools/lli/RemoteMemoryManager.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- RemoteMemoryManager.h - LLI MCJIT recording memory manager ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This memory manager allocates local storage and keeps a record of each
-// allocation. Iterators are provided for all data and code allocations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_REMOTEMEMORYMANAGER_H
-#define LLVM_TOOLS_LLI_REMOTEMEMORYMANAGER_H
-
-#include "RemoteTarget.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include <utility>
-
-namespace llvm {
-
-class RemoteMemoryManager : public RTDyldMemoryManager {
-public:
-  // Notice that this structure takes ownership of the memory allocated.
-  struct Allocation {
-    Allocation() {}
-    Allocation(sys::MemoryBlock mb, unsigned a, bool code)
-      : MB(mb), Alignment(a), IsCode(code) {}
-
-    sys::MemoryBlock  MB;
-    unsigned          Alignment;
-    bool              IsCode;
-  };
-
-private:
-  // This vector contains Allocation objects for all sections which we have
-  // allocated.  This vector effectively owns the memory associated with the
-  // allocations.
-  SmallVector<Allocation, 2>  AllocatedSections;
-
-  // This vector contains pointers to Allocation objects for any sections we
-  // have allocated locally but have not yet remapped for the remote target.
-  // When we receive notification of a completed module load, we will map
-  // these sections into the remote target.
-  SmallVector<Allocation, 2>  UnmappedSections;
-
-  // This map tracks the sections we have remapped for the remote target
-  // but have not yet copied to the target.
-  DenseMap<uint64_t, Allocation>  MappedSections;
-
-  // FIXME: This is part of a work around to keep sections near one another
-  // when MCJIT performs relocations after code emission but before
-  // the generated code is moved to the remote target.
-  sys::MemoryBlock Near;
-  sys::MemoryBlock allocateSection(uintptr_t Size);
-
-  RemoteTarget *Target;
-
-public:
-  RemoteMemoryManager() : Target(nullptr) {}
-  ~RemoteMemoryManager() override;
-
-  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID,
-                               StringRef SectionName) override;
-
-  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID, StringRef SectionName,
-                               bool IsReadOnly) override;
-
-  // For now, remote symbol resolution is not support in lli.  The MCJIT
-  // interface does support this, but clients must provide their own
-  // mechanism for finding remote symbol addresses.  MCJIT will resolve
-  // symbols from Modules it contains.
-  uint64_t getSymbolAddress(const std::string &Name) override { return 0; }
-
-  void notifyObjectLoaded(ExecutionEngine *EE,
-                          const object::ObjectFile &Obj) override;
-
-  bool finalizeMemory(std::string *ErrMsg) override;
-
-  // For now, remote EH frame registration isn't supported.  Remote symbol
-  // resolution is a prerequisite to supporting remote EH frame registration.
-  void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                        size_t Size) override {}
-  void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                          size_t Size) override {}
-
-  // This is a non-interface function used by lli
-  void setRemoteTarget(RemoteTarget *T) { Target = T; }
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/tools/lli/RemoteTarget.cpp b/contrib/llvm/tools/lli/RemoteTarget.cpp
deleted file mode 100644
index 95e1511..0000000
--- a/contrib/llvm/tools/lli/RemoteTarget.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- RemoteTarget.cpp - LLVM Remote process JIT execution -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation of the RemoteTarget class which executes JITed code in a
-// separate address range from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#include "RemoteTarget.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Memory.h"
-#include <stdlib.h>
-#include <string>
-
-using namespace llvm;
-
-////////////////////////////////////////////////////////////////////////////////
-// Simulated remote execution
-//
-// This implementation will simply move generated code and data to a new memory
-// location in the current executable and let it run from there.
-////////////////////////////////////////////////////////////////////////////////
-
-bool RemoteTarget::allocateSpace(size_t Size, unsigned Alignment,
-                                 uint64_t &Address) {
-  sys::MemoryBlock *Prev = Allocations.size() ? &Allocations.back() : nullptr;
-  sys::MemoryBlock Mem = sys::Memory::AllocateRWX(Size, Prev, &ErrorMsg);
-  if (Mem.base() == nullptr)
-    return false;
-  if ((uintptr_t)Mem.base() % Alignment) {
-    ErrorMsg = "unable to allocate sufficiently aligned memory";
-    return false;
-  }
-  Address = reinterpret_cast<uint64_t>(Mem.base());
-  Allocations.push_back(Mem);
-  return true;
-}
-
-bool RemoteTarget::loadData(uint64_t Address, const void *Data, size_t Size) {
-  memcpy ((void*)Address, Data, Size);
-  return true;
-}
-
-bool RemoteTarget::loadCode(uint64_t Address, const void *Data, size_t Size) {
-  memcpy ((void*)Address, Data, Size);
-  sys::MemoryBlock Mem((void*)Address, Size);
-  sys::Memory::setExecutable(Mem, &ErrorMsg);
-  return true;
-}
-
-bool RemoteTarget::executeCode(uint64_t Address, int &RetVal) {
-  int (*fn)() = (int(*)())Address;
-  RetVal = fn();
-  return true;
-}
-
-bool RemoteTarget::create() {
-  return true;
-}
-
-void RemoteTarget::stop() {
-  for (unsigned i = 0, e = Allocations.size(); i != e; ++i)
-    sys::Memory::ReleaseRWX(Allocations[i]);
-}
diff --git a/contrib/llvm/tools/lli/RemoteTarget.h b/contrib/llvm/tools/lli/RemoteTarget.h
deleted file mode 100644
index ee758a2..0000000
--- a/contrib/llvm/tools/lli/RemoteTarget.h
+++ /dev/null
@@ -1,122 +0,0 @@
-//===- RemoteTarget.h - LLVM Remote process JIT execution ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of the RemoteTarget class which executes JITed code in a
-// separate address range from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_REMOTETARGET_H
-#define LLVM_TOOLS_LLI_REMOTETARGET_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Memory.h"
-#include <stdlib.h>
-#include <string>
-
-namespace llvm {
-
-class RemoteTarget {
-  bool IsRunning;
-
-  typedef SmallVector<sys::MemoryBlock, 16> AllocMapType;
-  AllocMapType Allocations;
-
-protected:
-  std::string ErrorMsg;
-
-public:
-  StringRef getErrorMsg() const { return ErrorMsg; }
-
-  /// Allocate space in the remote target address space.
-  ///
-  /// @param      Size      Amount of space, in bytes, to allocate.
-  /// @param      Alignment Required minimum alignment for allocated space.
-  /// @param[out] Address   Remote address of the allocated memory.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  virtual bool allocateSpace(size_t Size,
-                             unsigned Alignment,
-                             uint64_t &Address);
-
-  bool isAllocatedMemory(uint64_t Address, uint32_t Size) {
-    uint64_t AddressEnd = Address + Size;
-    for (AllocMapType::const_iterator I = Allocations.begin(),
-                                      E = Allocations.end();
-         I != E; ++I) {
-      if (Address >= (uint64_t)I->base() &&
-          AddressEnd <= (uint64_t)I->base() + I->size())
-        return true;
-    }
-    return false;
-  }
-
-  /// Load data into the target address space.
-  ///
-  /// @param      Address   Destination address in the target process.
-  /// @param      Data      Source address in the host process.
-  /// @param      Size      Number of bytes to copy.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  virtual bool loadData(uint64_t Address,
-                        const void *Data,
-                        size_t Size);
-
-  /// Load code into the target address space and prepare it for execution.
-  ///
-  /// @param      Address   Destination address in the target process.
-  /// @param      Data      Source address in the host process.
-  /// @param      Size      Number of bytes to copy.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  virtual bool loadCode(uint64_t Address,
-                        const void *Data,
-                        size_t Size);
-
-  /// Execute code in the target process. The called function is required
-  /// to be of signature int "(*)(void)".
-  ///
-  /// @param      Address   Address of the loaded function in the target
-  ///                       process.
-  /// @param[out] RetVal    The integer return value of the called function.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  virtual bool executeCode(uint64_t Address,
-                           int &RetVal);
-
-  /// Minimum alignment for memory permissions. Used to separate code and
-  /// data regions to make sure data doesn't get marked as code or vice
-  /// versa.
-  ///
-  /// @returns Page alignment return value. Default of 4k.
-  virtual unsigned getPageAlignment() { return 4096; }
-
-  /// Start the remote process.
-  virtual bool create();
-
-  /// Terminate the remote process.
-  virtual void stop();
-
-  RemoteTarget() : IsRunning(false), ErrorMsg("") {}
-  virtual ~RemoteTarget() { if (IsRunning) stop(); }
-private:
-  // Main processing function for the remote target process. Command messages
-  // are received on file descriptor CmdFD and responses come back on OutFD.
-  static void doRemoteTargeting(int CmdFD, int OutFD);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/tools/lli/RemoteTargetExternal.cpp b/contrib/llvm/tools/lli/RemoteTargetExternal.cpp
deleted file mode 100644
index fe46248..0000000
--- a/contrib/llvm/tools/lli/RemoteTargetExternal.cpp
+++ /dev/null
@@ -1,327 +0,0 @@
-//===---- RemoteTargetExternal.cpp - LLVM out-of-process JIT execution ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation of the RemoteTargetExternal class which executes JITed code
-// in a separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "RemoteTarget.h"
-#include "RemoteTargetExternal.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Program.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "lli"
-
-bool RemoteTargetExternal::allocateSpace(size_t Size, unsigned Alignment,
-                                 uint64_t &Address) {
-  DEBUG(dbgs() << "Message [allocate space] size: " << Size <<
-                  ", align: " << Alignment << "\n");
-  if (!SendAllocateSpace(Alignment, Size)) {
-    ErrorMsg += ", (RemoteTargetExternal::allocateSpace)";
-    return false;
-  }
-  if (!Receive(LLI_AllocationResult, Address)) {
-    ErrorMsg += ", (RemoteTargetExternal::allocateSpace)";
-    return false;
-  }
-  if (Address == 0) {
-    ErrorMsg += "failed allocation, (RemoteTargetExternal::allocateSpace)";
-    return false;
-  }
-  DEBUG(dbgs() << "Message [allocate space] addr: 0x" <<
-                  format("%llx", Address) << "\n");
-  return true;
-}
-
-bool RemoteTargetExternal::loadData(uint64_t Address, const void *Data, size_t Size) {
-  DEBUG(dbgs() << "Message [load data] addr: 0x" << format("%llx", Address) <<
-                  ", size: " << Size << "\n");
-  if (!SendLoadSection(Address, Data, (uint32_t)Size, false)) {
-    ErrorMsg += ", (RemoteTargetExternal::loadData)";
-    return false;
-  }
-  int Status = LLI_Status_Success;
-  if (!Receive(LLI_LoadResult, Status)) {
-    ErrorMsg += ", (RemoteTargetExternal::loadData)";
-    return false;
-  }
-  if (Status == LLI_Status_IncompleteMsg) {
-    ErrorMsg += "incomplete load data, (RemoteTargetExternal::loadData)";
-    return false;
-  }
-  if (Status == LLI_Status_NotAllocated) {
-    ErrorMsg += "data memory not allocated, (RemoteTargetExternal::loadData)";
-    return false;
-  }
-  DEBUG(dbgs() << "Message [load data] complete\n");
-  return true;
-}
-
-bool RemoteTargetExternal::loadCode(uint64_t Address, const void *Data, size_t Size) {
-  DEBUG(dbgs() << "Message [load code] addr: 0x" << format("%llx", Address) <<
-                  ", size: " << Size << "\n");
-  if (!SendLoadSection(Address, Data, (uint32_t)Size, true)) {
-    ErrorMsg += ", (RemoteTargetExternal::loadCode)";
-    return false;
-  }
-  int Status = LLI_Status_Success;
-  if (!Receive(LLI_LoadResult, Status)) {
-    ErrorMsg += ", (RemoteTargetExternal::loadCode)";
-    return false;
-  }
-  if (Status == LLI_Status_IncompleteMsg) {
-    ErrorMsg += "incomplete load data, (RemoteTargetExternal::loadData)";
-    return false;
-  }
-  if (Status == LLI_Status_NotAllocated) {
-    ErrorMsg += "data memory not allocated, (RemoteTargetExternal::loadData)";
-    return false;
-  }
-  DEBUG(dbgs() << "Message [load code] complete\n");
-  return true;
-}
-
-bool RemoteTargetExternal::executeCode(uint64_t Address, int32_t &RetVal) {
-  DEBUG(dbgs() << "Message [exectue code] addr: " << Address << "\n");
-  if (!SendExecute(Address)) {
-    ErrorMsg += ", (RemoteTargetExternal::executeCode)";
-    return false;
-  }
-  if (!Receive(LLI_ExecutionResult, RetVal)) {
-    ErrorMsg += ", (RemoteTargetExternal::executeCode)";
-    return false;
-  }
-  DEBUG(dbgs() << "Message [exectue code] return: " << RetVal << "\n");
-  return true;
-}
-
-void RemoteTargetExternal::stop() {
-  SendTerminate();
-  RPC.Wait();
-}
-
-bool RemoteTargetExternal::SendAllocateSpace(uint32_t Alignment, uint32_t Size) {
-  if (!SendHeader(LLI_AllocateSpace)) {
-    ErrorMsg += ", (RemoteTargetExternal::SendAllocateSpace)";
-    return false;
-  }
-
-  AppendWrite((const void *)&Alignment, 4);
-  AppendWrite((const void *)&Size, 4);
-
-  if (!SendPayload()) {
-    ErrorMsg += ", (RemoteTargetExternal::SendAllocateSpace)";
-    return false;
-  }
-  return true;
-}
-
-bool RemoteTargetExternal::SendLoadSection(uint64_t Addr,
-                                       const void *Data,
-                                       uint32_t Size,
-                                       bool IsCode) {
-  LLIMessageType MsgType = IsCode ? LLI_LoadCodeSection : LLI_LoadDataSection;
-  if (!SendHeader(MsgType)) {
-    ErrorMsg += ", (RemoteTargetExternal::SendLoadSection)";
-    return false;
-  }
-
-  AppendWrite((const void *)&Addr, 8);
-  AppendWrite(Data, Size);
-
-  if (!SendPayload()) {
-    ErrorMsg += ", (RemoteTargetExternal::SendLoadSection)";
-    return false;
-  }
-  return true;
-}
-
-bool RemoteTargetExternal::SendExecute(uint64_t Addr) {
-  if (!SendHeader(LLI_Execute)) {
-    ErrorMsg += ", (RemoteTargetExternal::SendExecute)";
-    return false;
-  }
-
-  AppendWrite((const void *)&Addr, 8);
-
-  if (!SendPayload()) {
-    ErrorMsg += ", (RemoteTargetExternal::SendExecute)";
-    return false;
-  }
-  return true;
-}
-
-bool RemoteTargetExternal::SendTerminate() {
-  return SendHeader(LLI_Terminate);
-  // No data or data size is sent with Terminate
-}
-
-bool RemoteTargetExternal::Receive(LLIMessageType Msg) {
-  if (!ReceiveHeader(Msg))
-    return false;
-  int Unused;
-  AppendRead(&Unused, 0);
-  if (!ReceivePayload())
-    return false;
-  ReceiveData.clear();
-  Sizes.clear();
-  return true;
-}
-
-bool RemoteTargetExternal::Receive(LLIMessageType Msg, int32_t &Data) {
-  if (!ReceiveHeader(Msg))
-    return false;
-  AppendRead(&Data, 4);
-  if (!ReceivePayload())
-    return false;
-  ReceiveData.clear();
-  Sizes.clear();
-  return true;
-}
-
-bool RemoteTargetExternal::Receive(LLIMessageType Msg, uint64_t &Data) {
-  if (!ReceiveHeader(Msg))
-    return false;
-  AppendRead(&Data, 8);
-  if (!ReceivePayload())
-    return false;
-  ReceiveData.clear();
-  Sizes.clear();
-  return true;
-}
-
-bool RemoteTargetExternal::ReceiveHeader(LLIMessageType ExpectedMsgType) {
-  assert(ReceiveData.empty() && Sizes.empty() &&
-         "Payload vector not empty to receive header");
-
-  // Message header, with type to follow
-  uint32_t MsgType;
-  if (!ReadBytes(&MsgType, 4)) {
-    ErrorMsg += ", (RemoteTargetExternal::ReceiveHeader)";
-    return false;
-  }
-  if (MsgType != (uint32_t)ExpectedMsgType) {
-    ErrorMsg = "received unexpected message type";
-    ErrorMsg += ". Expecting: ";
-    ErrorMsg += ExpectedMsgType;
-    ErrorMsg += ", Got: ";
-    ErrorMsg += MsgType;
-    return false;
-  }
-  return true;
-}
-
-bool RemoteTargetExternal::ReceivePayload() {
-  assert(!ReceiveData.empty() &&
-         "Payload vector empty to receive");
-  assert(ReceiveData.size() == Sizes.size() &&
-         "Unexpected mismatch between data and size");
-
-  uint32_t TotalSize = 0;
-  for (int I=0, E=Sizes.size(); I < E; I++)
-    TotalSize += Sizes[I];
-
-  // Payload size header
-  uint32_t DataSize;
-  if (!ReadBytes(&DataSize, 4)) {
-    ErrorMsg += ", invalid data size";
-    return false;
-  }
-  if (DataSize != TotalSize) {
-    ErrorMsg = "unexpected data size";
-    ErrorMsg += ". Expecting: ";
-    ErrorMsg += TotalSize;
-    ErrorMsg += ", Got: ";
-    ErrorMsg += DataSize;
-    return false;
-  }
-  if (DataSize == 0)
-    return true;
-
-  // Payload itself
-  for (int I=0, E=Sizes.size(); I < E; I++) {
-    if (!ReadBytes(ReceiveData[I], Sizes[I])) {
-      ErrorMsg = "unexpected data while reading message";
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool RemoteTargetExternal::SendHeader(LLIMessageType MsgType) {
-  assert(SendData.empty() && Sizes.empty() &&
-         "Payload vector not empty to send header");
-
-  // Message header, with type to follow
-  if (!WriteBytes(&MsgType, 4)) {
-    ErrorMsg += ", (RemoteTargetExternal::SendHeader)";
-    return false;
-  }
-  return true;
-}
-
-bool RemoteTargetExternal::SendPayload() {
-  assert(!SendData.empty() && !Sizes.empty() &&
-         "Payload vector empty to send");
-  assert(SendData.size() == Sizes.size() &&
-         "Unexpected mismatch between data and size");
-
-  uint32_t TotalSize = 0;
-  for (int I=0, E=Sizes.size(); I < E; I++)
-    TotalSize += Sizes[I];
-
-  // Payload size header
-  if (!WriteBytes(&TotalSize, 4)) {
-    ErrorMsg += ", invalid data size";
-    return false;
-  }
-  if (TotalSize == 0)
-    return true;
-
-  // Payload itself
-  for (int I=0, E=Sizes.size(); I < E; I++) {
-    if (!WriteBytes(SendData[I], Sizes[I])) {
-      ErrorMsg = "unexpected data while writing message";
-      return false;
-    }
-  }
-
-  SendData.clear();
-  Sizes.clear();
-  return true;
-}
-
-void RemoteTargetExternal::AppendWrite(const void *Data, uint32_t Size) {
-  SendData.push_back(Data);
-  Sizes.push_back(Size);
-}
-
-void RemoteTargetExternal::AppendRead(void *Data, uint32_t Size) {
-  ReceiveData.push_back(Data);
-  Sizes.push_back(Size);
-}
-
-#ifdef LLVM_ON_UNIX
-#include "Unix/RPCChannel.inc"
-#endif
-
-#ifdef LLVM_ON_WIN32
-#include "Windows/RPCChannel.inc"
-#endif
diff --git a/contrib/llvm/tools/lli/RemoteTargetExternal.h b/contrib/llvm/tools/lli/RemoteTargetExternal.h
deleted file mode 100644
index afe8570..0000000
--- a/contrib/llvm/tools/lli/RemoteTargetExternal.h
+++ /dev/null
@@ -1,143 +0,0 @@
-//===----- RemoteTargetExternal.h - LLVM out-of-process JIT execution -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of the RemoteTargetExternal class which executes JITed code in a
-// separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_REMOTETARGETEXTERNAL_H
-#define LLVM_TOOLS_LLI_REMOTETARGETEXTERNAL_H
-
-#include "RPCChannel.h"
-#include "RemoteTarget.h"
-#include "RemoteTargetMessage.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Config/config.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Memory.h"
-#include <stdlib.h>
-#include <string>
-
-namespace llvm {
-
-class RemoteTargetExternal : public RemoteTarget {
-  RPCChannel RPC;
-
-  bool WriteBytes(const void *Data, size_t Size) {
-    return RPC.WriteBytes(Data, Size);
-  }
-
-  bool ReadBytes(void *Data, size_t Size) { return RPC.ReadBytes(Data, Size); }
-
-public:
-  /// Allocate space in the remote target address space.
-  ///
-  /// @param      Size      Amount of space, in bytes, to allocate.
-  /// @param      Alignment Required minimum alignment for allocated space.
-  /// @param[out] Address   Remote address of the allocated memory.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  bool allocateSpace(size_t Size, unsigned Alignment,
-                     uint64_t &Address) override;
-
-  /// Load data into the target address space.
-  ///
-  /// @param      Address   Destination address in the target process.
-  /// @param      Data      Source address in the host process.
-  /// @param      Size      Number of bytes to copy.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  bool loadData(uint64_t Address, const void *Data, size_t Size) override;
-
-  /// Load code into the target address space and prepare it for execution.
-  ///
-  /// @param      Address   Destination address in the target process.
-  /// @param      Data      Source address in the host process.
-  /// @param      Size      Number of bytes to copy.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  bool loadCode(uint64_t Address, const void *Data, size_t Size) override;
-
-  /// Execute code in the target process. The called function is required
-  /// to be of signature int "(*)(void)".
-  ///
-  /// @param      Address   Address of the loaded function in the target
-  ///                       process.
-  /// @param[out] RetVal    The integer return value of the called function.
-  ///
-  /// @returns True on success. On failure, ErrorMsg is updated with
-  ///          descriptive text of the encountered error.
-  bool executeCode(uint64_t Address, int &RetVal) override;
-
-  /// Minimum alignment for memory permissions. Used to separate code and
-  /// data regions to make sure data doesn't get marked as code or vice
-  /// versa.
-  ///
-  /// @returns Page alignment return value. Default of 4k.
-  unsigned getPageAlignment() override { return 4096; }
-
-  bool create() override {
-    RPC.ChildName = ChildName;
-    if (!RPC.createServer())
-      return true;
-
-    // We must get Ack from the client (blocking read)
-    if (!Receive(LLI_ChildActive)) {
-      ErrorMsg += ", (RPCChannel::create) - Stopping process!";
-      stop();
-      return false;
-    }
-
-    return true;
-  }
-
-  /// Terminate the remote process.
-  void stop() override;
-
-  RemoteTargetExternal(std::string &Name) : RemoteTarget(), ChildName(Name) {}
-  ~RemoteTargetExternal() override {}
-
-private:
-  std::string ChildName;
-
-  bool SendAllocateSpace(uint32_t Alignment, uint32_t Size);
-  bool SendLoadSection(uint64_t Addr,
-                       const void *Data,
-                       uint32_t Size,
-                       bool IsCode);
-  bool SendExecute(uint64_t Addr);
-  bool SendTerminate();
-
-  // High-level wrappers for receiving data
-  bool Receive(LLIMessageType Msg);
-  bool Receive(LLIMessageType Msg, int32_t &Data);
-  bool Receive(LLIMessageType Msg, uint64_t &Data);
-
-  // Lower level target-independent read/write to deal with errors
-  bool ReceiveHeader(LLIMessageType Msg);
-  bool ReceivePayload();
-  bool SendHeader(LLIMessageType Msg);
-  bool SendPayload();
-
-  // Functions to append/retrieve data from the payload
-  SmallVector<const void *, 2> SendData;
-  SmallVector<void *, 1> ReceiveData; // Future proof
-  SmallVector<int, 2> Sizes;
-  void AppendWrite(const void *Data, uint32_t Size);
-  void AppendRead(void *Data, uint32_t Size);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/tools/lli/RemoteTargetMessage.h b/contrib/llvm/tools/lli/RemoteTargetMessage.h
deleted file mode 100644
index c210e4b..0000000
--- a/contrib/llvm/tools/lli/RemoteTargetMessage.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===---- RemoteTargetMessage.h - LLI out-of-process message protocol -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of the LLIMessageType enum which is used for communication with a
-// child process for remote execution.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_REMOTETARGETMESSAGE_H
-#define LLVM_TOOLS_LLI_REMOTETARGETMESSAGE_H
-
-namespace llvm {
-
-// LLI messages from parent-to-child or vice versa follow an exceedingly simple
-// protocol where the first four bytes represent the message type, the next
-// four bytes represent the size of data for the command and following bytes
-// represent the actual data.
-//
-// The protocol is not intended to be robust, secure or fault-tolerant.  It is
-// only here for testing purposes and is therefore intended to be the simplest
-// implementation that will work.  It is assumed that the parent and child
-// share characteristics like endianness.
-//
-// Quick description of the protocol:
-//
-// { Header + Payload Size + Payload }
-//
-// The protocol message consist of a header, the payload size (which can be
-// zero), and the payload itself. The payload can contain any number of items,
-// and the size has to be the sum of them all. Each end is responsible for
-// reading/writing the correct number of items with the correct sizes.
-//
-// The current four known exchanges are:
-//
-//  * Allocate Space:
-//   Parent: { LLI_AllocateSpace, 8, Alignment, Size }
-//    Child: { LLI_AllocationResult, 8, Address }
-//
-//  * Load Data:
-//   Parent: { LLI_LoadDataSection, 8+Size, Address, Data }
-//    Child: { LLI_LoadComplete, 4, StatusCode }
-//
-//  * Load Code:
-//   Parent: { LLI_LoadCodeSection, 8+Size, Address, Code }
-//    Child: { LLI_LoadComplete, 4, StatusCode }
-//
-//  * Execute Code:
-//   Parent: { LLI_Execute, 8, Address }
-//    Child: { LLI_ExecutionResult, 4, Result }
-//
-// It is the responsibility of either side to check for correct headers,
-// sizes and payloads, since any inconsistency would misalign the pipe, and
-// result in data corruption.
-
-enum LLIMessageType {
-  LLI_Error = -1,
-  LLI_ChildActive = 0,        // Data = not used
-  LLI_AllocateSpace,          // Data = struct { uint32_t Align, uint_32t Size }
-  LLI_AllocationResult,       // Data = uint64_t Address (child memory space)
-
-  LLI_LoadCodeSection,        // Data = uint64_t Address, void * SectionData
-  LLI_LoadDataSection,        // Data = uint64_t Address, void * SectionData
-  LLI_LoadResult,             // Data = uint32_t LLIMessageStatus
-
-  LLI_Execute,                // Data = uint64_t Address
-  LLI_ExecutionResult,        // Data = uint32_t Result
-
-  LLI_Terminate               // Data = not used
-};
-
-enum LLIMessageStatus {
-  LLI_Status_Success = 0,     // Operation succeeded
-  LLI_Status_NotAllocated,    // Address+Size not allocated in child space
-  LLI_Status_IncompleteMsg    // Size received doesn't match request
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/tools/lli/Unix/RPCChannel.inc b/contrib/llvm/tools/lli/Unix/RPCChannel.inc
deleted file mode 100644
index 6a9ae14..0000000
--- a/contrib/llvm/tools/lli/Unix/RPCChannel.inc
+++ /dev/null
@@ -1,122 +0,0 @@
-//=- RPCChannel.inc - LLVM out-of-process JIT execution for Unix --=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation of the Unix-specific parts of the RPCChannel class
-// which executes JITed code in a separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Errno.h"
-#include "llvm/Support/raw_ostream.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/wait.h>
-#include <unistd.h>
-
-namespace {
-
-struct ConnectionData_t {
-  int InputPipe;
-  int OutputPipe;
-
-  ConnectionData_t(int in, int out) : InputPipe(in), OutputPipe(out) {}
-};
-
-} // namespace
-
-namespace llvm {
-
-bool RPCChannel::createServer() {
-  int PipeFD[2][2];
-  pid_t ChildPID;
-
-  // Create two pipes.
-  if (pipe(PipeFD[0]) != 0 || pipe(PipeFD[1]) != 0)
-    perror("Error creating pipe: ");
-
-  ChildPID = fork();
-
-  if (ChildPID == 0) {
-    // In the child...
-
-    // Close the parent ends of the pipes
-    close(PipeFD[0][1]);
-    close(PipeFD[1][0]);
-
-    // Use our pipes as stdin and stdout
-    if (PipeFD[0][0] != STDIN_FILENO) {
-      dup2(PipeFD[0][0], STDIN_FILENO);
-      close(PipeFD[0][0]);
-    }
-    if (PipeFD[1][1] != STDOUT_FILENO) {
-      dup2(PipeFD[1][1], STDOUT_FILENO);
-      close(PipeFD[1][1]);
-    }
-
-    // Execute the child process.
-    char *args[1] = { nullptr };
-    int rc = execv(ChildName.c_str(), args);
-    if (rc != 0)
-      perror("Error executing child process: ");
-  } else {
-    // In the parent...
-
-    // Close the child ends of the pipes
-    close(PipeFD[0][0]);
-    close(PipeFD[1][1]);
-
-    // Store the parent ends of the pipes
-    ConnectionData = (void *)new ConnectionData_t(PipeFD[1][0], PipeFD[0][1]);
-    return true;
-  }
-  return false;
-}
-
-bool RPCChannel::createClient() {
-  // Store the parent ends of the pipes
-  ConnectionData = (void *)new ConnectionData_t(STDIN_FILENO, STDOUT_FILENO);
-  return true;
-}
-
-void RPCChannel::Wait() { wait(nullptr); }
-
-static bool CheckError(int rc, size_t Size, const char *Desc) {
-  if (rc < 0) {
-    llvm::errs() << "IO Error: " << Desc << ": " << sys::StrError() << '\n';
-    return false;
-  } else if ((size_t)rc != Size) {
-    std::string ErrorMsg;
-    char Number[10] = { 0 };
-    ErrorMsg += "Expecting ";
-    sprintf(Number, "%d", (uint32_t)Size);
-    ErrorMsg += Number;
-    ErrorMsg += " bytes, Got ";
-    sprintf(Number, "%d", rc);
-    ErrorMsg += Number;
-    llvm::errs() << "RPC Error: " << Desc << ": " << ErrorMsg << '\n';
-    return false;
-  }
-  return true;
-}
-
-bool RPCChannel::WriteBytes(const void *Data, size_t Size) {
-  int rc = write(((ConnectionData_t *)ConnectionData)->OutputPipe, Data, Size);
-  return CheckError(rc, Size, "WriteBytes");
-}
-
-bool RPCChannel::ReadBytes(void *Data, size_t Size) {
-  int rc = read(((ConnectionData_t *)ConnectionData)->InputPipe, Data, Size);
-  return CheckError(rc, Size, "ReadBytes");
-}
-
-RPCChannel::~RPCChannel() {
-  delete static_cast<ConnectionData_t *>(ConnectionData);
-}
-
-} // namespace llvm
diff --git a/contrib/llvm/tools/lli/Windows/RPCChannel.inc b/contrib/llvm/tools/lli/Windows/RPCChannel.inc
deleted file mode 100644
index 82f2acb..0000000
--- a/contrib/llvm/tools/lli/Windows/RPCChannel.inc
+++ /dev/null
@@ -1,29 +0,0 @@
-//=- RPCChannel.inc - LLVM out-of-process JIT execution for Windows --=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation of the Windows-specific parts of the RPCChannel class
-// which executes JITed code in a separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-bool RPCChannel::createServer() { return false; }
-
-bool RPCChannel::createClient() { return false; }
-
-bool RPCChannel::WriteBytes(const void *Data, size_t Size) { return false; }
-
-bool RPCChannel::ReadBytes(void *Data, size_t Size) { return false; }
-
-void RPCChannel::Wait() {}
-
-RPCChannel::~RPCChannel() {}
-
-} // namespace llvm
diff --git a/contrib/llvm/tools/lli/lli.cpp b/contrib/llvm/tools/lli/lli.cpp
index 9f71406..67e7cbd 100644
--- a/contrib/llvm/tools/lli/lli.cpp
+++ b/contrib/llvm/tools/lli/lli.cpp
@@ -13,11 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/LLVMContext.h"
 #include "OrcLazyJIT.h"
-#include "RemoteMemoryManager.h"
-#include "RemoteTarget.h"
-#include "RemoteTargetExternal.h"
+#include "RemoteJITUtils.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
@@ -28,6 +26,7 @@
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/OrcMCJITReplacement.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
@@ -449,7 +448,7 @@ int main(int argc, char **argv, char * const *envp) {
   RTDyldMemoryManager *RTDyldMM = nullptr;
   if (!ForceInterpreter) {
     if (RemoteMCJIT)
-      RTDyldMM = new RemoteMemoryManager();
+      RTDyldMM = new ForwardingMemoryManager();
     else
       RTDyldMM = new SectionMemoryManager();
 
@@ -582,6 +581,25 @@ int main(int argc, char **argv, char * const *envp) {
 
   int Result;
 
+  // Sanity check use of remote-jit: LLI currently only supports use of the
+  // remote JIT on Unix platforms.
+  if (RemoteMCJIT) {
+#ifndef LLVM_ON_UNIX
+    errs() << "Warning: host does not support external remote targets.\n"
+           << "  Defaulting to local execution execution\n";
+    return -1;
+#else
+    if (ChildExecPath.empty()) {
+      errs() << "-remote-mcjit requires -mcjit-remote-process.\n";
+      exit(1);
+    } else if (!sys::fs::can_execute(ChildExecPath)) {
+      errs() << "Unable to find usable child executable: '" << ChildExecPath
+             << "'\n";
+      return -1;
+    }
+#endif
+  }
+
   if (!RemoteMCJIT) {
     // If the program doesn't explicitly call exit, we will need the Exit
     // function later on to make an explicit call, so get the function now.
@@ -629,66 +647,123 @@ int main(int argc, char **argv, char * const *envp) {
     // Remote target MCJIT doesn't (yet) support static constructors. No reason
     // it couldn't. This is a limitation of the LLI implemantation, not the
     // MCJIT itself. FIXME.
-    //
-    RemoteMemoryManager *MM = static_cast<RemoteMemoryManager*>(RTDyldMM);
-    // Everything is prepared now, so lay out our program for the target
-    // address space, assign the section addresses to resolve any relocations,
-    // and send it to the target.
-
-    std::unique_ptr<RemoteTarget> Target;
-    if (!ChildExecPath.empty()) { // Remote execution on a child process
-#ifndef LLVM_ON_UNIX
-      // FIXME: Remove this pointless fallback mode which causes tests to "pass"
-      // on platforms where they should XFAIL.
-      errs() << "Warning: host does not support external remote targets.\n"
-             << "  Defaulting to simulated remote execution\n";
-      Target.reset(new RemoteTarget);
-#else
-      if (!sys::fs::can_execute(ChildExecPath)) {
-        errs() << "Unable to find usable child executable: '" << ChildExecPath
-               << "'\n";
-        return -1;
-      }
-      Target.reset(new RemoteTargetExternal(ChildExecPath));
-#endif
-    } else {
-      // No child process name provided, use simulated remote execution.
-      Target.reset(new RemoteTarget);
+
+    // Lanch the remote process and get a channel to it.
+    std::unique_ptr<FDRPCChannel> C = launchRemote();
+    if (!C) {
+      errs() << "Failed to launch remote JIT.\n";
+      exit(1);
     }
 
-    // Give the memory manager a pointer to our remote target interface object.
-    MM->setRemoteTarget(Target.get());
+    // Create a remote target client running over the channel.
+    typedef orc::remote::OrcRemoteTargetClient<orc::remote::RPCChannel> MyRemote;
+    ErrorOr<MyRemote> R = MyRemote::Create(*C);
+    if (!R) {
+      errs() << "Could not create remote: " << R.getError().message() << "\n";
+      exit(1);
+    }
 
-    // Create the remote target.
-    if (!Target->create()) {
-      errs() << "ERROR: " << Target->getErrorMsg() << "\n";
-      return EXIT_FAILURE;
+    // Create a remote memory manager.
+    std::unique_ptr<MyRemote::RCMemoryManager> RemoteMM;
+    if (auto EC = R->createRemoteMemoryManager(RemoteMM)) {
+      errs() << "Could not create remote memory manager: " << EC.message() << "\n";
+      exit(1);
     }
 
-    // Since we're executing in a (at least simulated) remote address space,
-    // we can't use the ExecutionEngine::runFunctionAsMain(). We have to
-    // grab the function address directly here and tell the remote target
-    // to execute the function.
-    //
-    // Our memory manager will map generated code into the remote address
-    // space as it is loaded and copy the bits over during the finalizeMemory
-    // operation.
-    //
+    // Forward MCJIT's memory manager calls to the remote memory manager.
+    static_cast<ForwardingMemoryManager*>(RTDyldMM)->setMemMgr(
+      std::move(RemoteMM));
+
+    // Forward MCJIT's symbol resolution calls to the remote.
+    static_cast<ForwardingMemoryManager*>(RTDyldMM)->setResolver(
+      orc::createLambdaResolver(
+        [&](const std::string &Name) {
+          orc::TargetAddress Addr = 0;
+          if (auto EC = R->getSymbolAddress(Addr, Name)) {
+            errs() << "Failure during symbol lookup: " << EC.message() << "\n";
+            exit(1);
+          }
+          return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
+        },
+        [](const std::string &Name) { return nullptr; }
+      ));
+
+    // Grab the target address of the JIT'd main function on the remote and call
+    // it.
     // FIXME: argv and envp handling.
-    uint64_t Entry = EE->getFunctionAddress(EntryFn->getName().str());
-
+    orc::TargetAddress Entry = EE->getFunctionAddress(EntryFn->getName().str());
+    EE->finalizeObject();
     DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at 0x"
                  << format("%llx", Entry) << "\n");
-
-    if (!Target->executeCode(Entry, Result))
-      errs() << "ERROR: " << Target->getErrorMsg() << "\n";
+    if (auto EC = R->callIntVoid(Result, Entry))
+      errs() << "ERROR: " << EC.message() << "\n";
 
     // Like static constructors, the remote target MCJIT support doesn't handle
     // this yet. It could. FIXME.
 
-    // Stop the remote target
-    Target->stop();
+    // Delete the EE - we need to tear it down *before* we terminate the session
+    // with the remote, otherwise it'll crash when it tries to release resources
+    // on a remote that has already been disconnected.
+    delete EE;
+    EE = nullptr;
+
+    // Signal the remote target that we're done JITing.
+    R->terminateSession();
   }
 
   return Result;
 }
+
+std::unique_ptr<FDRPCChannel> launchRemote() {
+#ifndef LLVM_ON_UNIX
+  llvm_unreachable("launchRemote not supported on non-Unix platforms");
+#else
+  int PipeFD[2][2];
+  pid_t ChildPID;
+
+  // Create two pipes.
+  if (pipe(PipeFD[0]) != 0 || pipe(PipeFD[1]) != 0)
+    perror("Error creating pipe: ");
+
+  ChildPID = fork();
+
+  if (ChildPID == 0) {
+    // In the child...
+
+    // Close the parent ends of the pipes
+    close(PipeFD[0][1]);
+    close(PipeFD[1][0]);
+
+
+    // Execute the child process.
+    std::unique_ptr<char[]> ChildPath, ChildIn, ChildOut;
+    {
+      ChildPath.reset(new char[ChildExecPath.size() + 1]);
+      std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]);
+      ChildPath[ChildExecPath.size()] = '\0';
+      std::string ChildInStr = std::to_string(PipeFD[0][0]);
+      ChildIn.reset(new char[ChildInStr.size() + 1]);
+      std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]);
+      ChildIn[ChildInStr.size()] = '\0';
+      std::string ChildOutStr = std::to_string(PipeFD[1][1]);
+      ChildOut.reset(new char[ChildOutStr.size() + 1]);
+      std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]);
+      ChildOut[ChildOutStr.size()] = '\0';
+    }
+
+    char * const args[] = { &ChildPath[0], &ChildIn[0], &ChildOut[0], nullptr };
+    int rc = execv(ChildExecPath.c_str(), args);
+    if (rc != 0)
+      perror("Error executing child process: ");
+    llvm_unreachable("Error executing child process");
+  }
+  // else we're the parent...
+
+  // Close the child ends of the pipes
+  close(PipeFD[0][0]);
+  close(PipeFD[1][1]);
+
+  // Return an RPC channel connected to our end of the pipes.
+  return llvm::make_unique<FDRPCChannel>(PipeFD[1][0], PipeFD[0][1]);
+#endif
+}
diff --git a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
index 4bc6922..2320511 100644
--- a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -289,6 +289,7 @@ int main(int argc, char **argv) {
     CurrentActivity = "loading file '" + InputFilenames[i] + "'";
     ErrorOr<std::unique_ptr<LTOModule>> ModuleOrErr =
         LTOModule::createFromFile(Context, InputFilenames[i].c_str(), Options);
+    error(ModuleOrErr, "error " + CurrentActivity);
     std::unique_ptr<LTOModule> &Module = *ModuleOrErr;
     CurrentActivity = "";
 
diff --git a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
index f286351..5d21b33 100644
--- a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -358,13 +358,30 @@ static void printExportTable(const COFFObjectFile *Obj) {
     uint32_t RVA;
     if (I->getExportRVA(RVA))
       return;
-    outs() << format("    % 4d %# 8x", Ordinal, RVA);
+    bool IsForwarder;
+    if (I->isForwarder(IsForwarder))
+      return;
+
+    if (IsForwarder) {
+      // Export table entries can be used to re-export symbols that
+      // this COFF file is imported from some DLLs. This is rare.
+      // In most cases IsForwarder is false.
+      outs() << format("    % 4d         ", Ordinal);
+    } else {
+      outs() << format("    % 4d %# 8x", Ordinal, RVA);
+    }
 
     StringRef Name;
     if (I->getSymbolName(Name))
       continue;
     if (!Name.empty())
       outs() << "  " << Name;
+    if (IsForwarder) {
+      StringRef S;
+      if (I->getForwardTo(S))
+        return;
+      outs() << " (forwarded to " << S << ")";
+    }
     outs() << "\n";
   }
 }
diff --git a/contrib/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
index a2f3bc8..258c0b5 100644
--- a/contrib/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
@@ -1196,7 +1196,11 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
     PrintSymbolTable(MachOOF);
   if (UnwindInfo)
     printMachOUnwindInfo(MachOOF);
-  if (PrivateHeaders)
+  if (PrivateHeaders) {
+    printMachOFileHeader(MachOOF);
+    printMachOLoadCommands(MachOOF);
+  }
+  if (FirstPrivateHeader)
     printMachOFileHeader(MachOOF);
   if (ObjcMetaData)
     printObjcMetaData(MachOOF, !NonVerbose);
@@ -1477,10 +1481,8 @@ void llvm::ParseInputMachO(StringRef Filename) {
 
   // Attempt to open the binary.
   ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename);
-  if (std::error_code EC = BinaryOrErr.getError()) {
-    errs() << "llvm-objdump: '" << Filename << "': " << EC.message() << ".\n";
-    return;
-  }
+  if (std::error_code EC = BinaryOrErr.getError())
+    report_error(Filename, EC);
   Binary &Bin = *BinaryOrErr.get().getBinary();
 
   if (Archive *A = dyn_cast<Archive>(&Bin)) {
@@ -1649,8 +1651,9 @@ void llvm::ParseInputMachO(StringRef Filename) {
     } else
       errs() << "llvm-objdump: '" << Filename << "': "
              << "Object is not a Mach-O file type.\n";
-  } else
-    report_error(Filename, object_error::invalid_file_type);
+    return;
+  }
+  llvm_unreachable("Input object can't be invalid at this point");
 }
 
 typedef std::pair<uint64_t, const char *> BindInfoEntry;
@@ -8646,31 +8649,40 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
   }
 }
 
-static void getAndPrintMachHeader(const MachOObjectFile *Obj,
-                                  uint32_t &filetype, uint32_t &cputype,
-                                  bool verbose) {
+static void PrintMachHeader(const MachOObjectFile *Obj, bool verbose) {
   if (Obj->is64Bit()) {
     MachO::mach_header_64 H_64;
     H_64 = Obj->getHeader64();
     PrintMachHeader(H_64.magic, H_64.cputype, H_64.cpusubtype, H_64.filetype,
                     H_64.ncmds, H_64.sizeofcmds, H_64.flags, verbose);
-    filetype = H_64.filetype;
-    cputype = H_64.cputype;
   } else {
     MachO::mach_header H;
     H = Obj->getHeader();
     PrintMachHeader(H.magic, H.cputype, H.cpusubtype, H.filetype, H.ncmds,
                     H.sizeofcmds, H.flags, verbose);
-    filetype = H.filetype;
-    cputype = H.cputype;
   }
 }
 
 void llvm::printMachOFileHeader(const object::ObjectFile *Obj) {
   const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
+  PrintMachHeader(file, !NonVerbose);
+}
+
+void llvm::printMachOLoadCommands(const object::ObjectFile *Obj) {
+  const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
   uint32_t filetype = 0;
   uint32_t cputype = 0;
-  getAndPrintMachHeader(file, filetype, cputype, !NonVerbose);
+  if (file->is64Bit()) {
+    MachO::mach_header_64 H_64;
+    H_64 = file->getHeader64();
+    filetype = H_64.filetype;
+    cputype = H_64.cputype;
+  } else {
+    MachO::mach_header H;
+    H = file->getHeader();
+    filetype = H.filetype;
+    cputype = H.cputype;
+  }
   PrintLoadCommands(file, filetype, cputype, !NonVerbose);
 }
 
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 22167c7..d5ae5de 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -165,6 +165,11 @@ cl::opt<bool>
 llvm::PrivateHeaders("private-headers",
                      cl::desc("Display format specific file headers"));
 
+cl::opt<bool>
+llvm::FirstPrivateHeader("private-header",
+                         cl::desc("Display only the first format specific file "
+                                  "header"));
+
 static cl::alias
 PrivateHeadersShort("p", cl::desc("Alias for --private-headers"),
                     cl::aliasopt(PrivateHeaders));
@@ -482,6 +487,23 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
   case ELF::EM_MIPS:
     res = Target;
     break;
+  case ELF::EM_WEBASSEMBLY:
+    switch (type) {
+    case ELF::R_WEBASSEMBLY_DATA: {
+      std::string fmtbuf;
+      raw_string_ostream fmt(fmtbuf);
+      fmt << Target << (addend < 0 ? "" : "+") << addend;
+      fmt.flush();
+      Result.append(fmtbuf.begin(), fmtbuf.end());
+      break;
+    }
+    case ELF::R_WEBASSEMBLY_FUNCTION:
+      res = Target;
+      break;
+    default:
+      res = "Unknown";
+    }
+    break;
   default:
     res = "Unknown";
   }
@@ -1478,7 +1500,19 @@ static void printFaultMaps(const ObjectFile *Obj) {
   outs() << FMP;
 }
 
-static void printPrivateFileHeader(const ObjectFile *o) {
+static void printPrivateFileHeaders(const ObjectFile *o) {
+  if (o->isELF())
+    printELFFileHeader(o);
+  else if (o->isCOFF())
+    printCOFFFileHeader(o);
+  else if (o->isMachO()) {
+    printMachOFileHeader(o);
+    printMachOLoadCommands(o);
+  } else
+    report_fatal_error("Invalid/Unsupported object file format");
+}
+
+static void printFirstPrivateFileHeader(const ObjectFile *o) {
   if (o->isELF())
     printELFFileHeader(o);
   else if (o->isCOFF())
@@ -1510,7 +1544,9 @@ static void DumpObject(const ObjectFile *o) {
   if (UnwindInfo)
     PrintUnwindInfo(o);
   if (PrivateHeaders)
-    printPrivateFileHeader(o);
+    printPrivateFileHeaders(o);
+  if (FirstPrivateHeader)
+    printFirstPrivateFileHeader(o);
   if (ExportsTrie)
     printExportsTrie(o);
   if (Rebase)
@@ -1601,6 +1637,7 @@ int main(int argc, char **argv) {
       && !SymbolTable
       && !UnwindInfo
       && !PrivateHeaders
+      && !FirstPrivateHeader
       && !ExportsTrie
       && !Rebase
       && !Bind
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
index 6e8ad6b..60cabbc 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -31,6 +31,7 @@ extern cl::opt<bool> Disassemble;
 extern cl::opt<bool> DisassembleAll;
 extern cl::opt<bool> NoShowRawInsn;
 extern cl::opt<bool> PrivateHeaders;
+extern cl::opt<bool> FirstPrivateHeader;
 extern cl::opt<bool> ExportsTrie;
 extern cl::opt<bool> Rebase;
 extern cl::opt<bool> Bind;
@@ -70,6 +71,7 @@ void printELFFileHeader(const object::ObjectFile *o);
 void printCOFFFileHeader(const object::ObjectFile *o);
 void printCOFFSymbolTable(const object::COFFObjectFile *o);
 void printMachOFileHeader(const object::ObjectFile *o);
+void printMachOLoadCommands(const object::ObjectFile *o);
 void printExportsTrie(const object::ObjectFile *o);
 void printRebaseTable(const object::ObjectFile *o);
 void printBindTable(const object::ObjectFile *o);
diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
index 516d1cf..d44da0d 100644
--- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -603,12 +603,14 @@ void COFFDumper::printCodeViewSection(const SectionRef &Section) {
       // in the line table.  The filename string is accessed using double
       // indirection to the string table subsection using the index subsection.
       uint32_t OffsetInIndex = DE.getU32(&Offset),
-               SegmentLength = DE.getU32(&Offset),
+               NumLines = DE.getU32(&Offset),
                FullSegmentSize = DE.getU32(&Offset);
 
+      uint32_t ColumnOffset = Offset + 8 * NumLines;
+      DataExtractor ColumnDE(DE.getData(), true, 4);
+
       if (FullSegmentSize !=
-          12 + 8 * SegmentLength +
-              (HasColumnInformation ? 4 * SegmentLength : 0)) {
+          12 + 8 * NumLines + (HasColumnInformation ? 4 * NumLines : 0)) {
         error(object_error::parse_failed);
         return;
       }
@@ -635,29 +637,41 @@ void COFFDumper::printCodeViewSection(const SectionRef &Section) {
       StringRef Filename(CVStringTable.data() + FilenameOffset);
       ListScope S(W, "FilenameSegment");
       W.printString("Filename", Filename);
-      for (unsigned J = 0; J != SegmentLength && DE.isValidOffset(Offset);
-           ++J) {
+      for (unsigned LineIdx = 0;
+           LineIdx != NumLines && DE.isValidOffset(Offset); ++LineIdx) {
         // Then go the (PC, LineNumber) pairs.  The line number is stored in the
         // least significant 31 bits of the respective word in the table.
-        uint32_t PC = DE.getU32(&Offset),
-                 LineNumber = DE.getU32(&Offset) & 0x7fffffff;
+        uint32_t PC = DE.getU32(&Offset), LineData = DE.getU32(&Offset);
         if (PC >= FunctionSize) {
           error(object_error::parse_failed);
           return;
         }
         char Buffer[32];
         format("+0x%X", PC).snprint(Buffer, 32);
-        W.printNumber(Buffer, LineNumber);
-      }
-      if (HasColumnInformation) {
-        for (unsigned J = 0; J != SegmentLength && DE.isValidOffset(Offset);
-             ++J) {
-          uint16_t ColStart = DE.getU16(&Offset);
+        ListScope PCScope(W, Buffer);
+        uint32_t LineNumberStart = LineData & COFF::CVL_MaxLineNumber;
+        uint32_t LineNumberEndDelta =
+            (LineData >> COFF::CVL_LineNumberStartBits) &
+            COFF::CVL_LineNumberEndDeltaMask;
+        bool IsStatement = LineData & COFF::CVL_IsStatement;
+        W.printNumber("LineNumberStart", LineNumberStart);
+        W.printNumber("LineNumberEndDelta", LineNumberEndDelta);
+        W.printBoolean("IsStatement", IsStatement);
+        if (HasColumnInformation &&
+            ColumnDE.isValidOffsetForDataOfSize(ColumnOffset, 4)) {
+          uint16_t ColStart = ColumnDE.getU16(&ColumnOffset);
           W.printNumber("ColStart", ColStart);
-          uint16_t ColEnd = DE.getU16(&Offset);
+          uint16_t ColEnd = ColumnDE.getU16(&ColumnOffset);
           W.printNumber("ColEnd", ColEnd);
         }
       }
+      // Skip over the column data.
+      if (HasColumnInformation) {
+        for (unsigned LineIdx = 0;
+             LineIdx != NumLines && DE.isValidOffset(Offset); ++LineIdx) {
+          DE.getU32(&Offset);
+        }
+      }
     }
   }
 }
diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
index 02397f3..be84f3c 100644
--- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -708,7 +708,8 @@ static const EnumEntry<unsigned> ElfMachineType[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE5   ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_78KOR        ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_56800EX      ),
-  LLVM_READOBJ_ENUM_ENT(ELF, EM_AMDGPU       )
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_AMDGPU       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_WEBASSEMBLY  ),
 };
 
 static const EnumEntry<unsigned> ElfSymbolBindings[] = {
diff --git a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index e45660c..9503493 100644
--- a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -82,6 +82,10 @@ static cl::opt<bool>
     ClPrettyPrint("pretty-print", cl::init(false),
                   cl::desc("Make the output more human friendly"));
 
+static cl::opt<int> ClPrintSourceContextLines(
+    "print-source-context-lines", cl::init(0),
+    cl::desc("Print N number of source file context"));
+
 static bool error(std::error_code ec) {
   if (!ec)
     return false;
@@ -89,18 +93,14 @@ static bool error(std::error_code ec) {
   return true;
 }
 
-static bool parseCommand(bool &IsData, std::string &ModuleName,
-                         uint64_t &ModuleOffset) {
+static bool parseCommand(StringRef InputString, bool &IsData,
+                         std::string &ModuleName, uint64_t &ModuleOffset) {
   const char *kDataCmd = "DATA ";
   const char *kCodeCmd = "CODE ";
-  const int kMaxInputStringLength = 1024;
-  const char kDelimiters[] = " \n";
-  char InputString[kMaxInputStringLength];
-  if (!fgets(InputString, sizeof(InputString), stdin))
-    return false;
+  const char kDelimiters[] = " \n\r";
   IsData = false;
   ModuleName = "";
-  char *pos = InputString;
+  const char *pos = InputString.data();
   if (strncmp(pos, kDataCmd, strlen(kDataCmd)) == 0) {
     IsData = true;
     pos += strlen(kDataCmd);
@@ -117,7 +117,7 @@ static bool parseCommand(bool &IsData, std::string &ModuleName,
     if (*pos == '"' || *pos == '\'') {
       char quote = *pos;
       pos++;
-      char *end = strchr(pos, quote);
+      const char *end = strchr(pos, quote);
       if (!end)
         return false;
       ModuleName = std::string(pos, end - pos);
@@ -158,13 +158,25 @@ int main(int argc, char **argv) {
   }
   LLVMSymbolizer Symbolizer(Opts);
 
-  bool IsData = false;
-  std::string ModuleName;
-  uint64_t ModuleOffset;
   DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
-                    ClPrettyPrint);
+                    ClPrettyPrint, ClPrintSourceContextLines);
+
+  const int kMaxInputStringLength = 1024;
+  char InputString[kMaxInputStringLength];
+
+  while (true) {
+    if (!fgets(InputString, sizeof(InputString), stdin))
+      break;
+
+    bool IsData = false;
+    std::string ModuleName;
+    uint64_t ModuleOffset = 0;
+    if (!parseCommand(StringRef(InputString), IsData, ModuleName,
+                      ModuleOffset)) {
+      outs() << InputString;
+      continue;
+    }
 
-  while (parseCommand(IsData, ModuleName, ModuleOffset)) {
     if (ClPrintAddress) {
       outs() << "0x";
       outs().write_hex(ModuleOffset);
diff --git a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
index cc74f9e..cf7cbd9 100644
--- a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -65,7 +65,7 @@ private:
 
 static void PrintCases(std::vector<std::pair<std::string,
                        AsmWriterOperand> > &OpsToPrint, raw_ostream &O) {
-  O << "    case " << OpsToPrint.back().first << ": ";
+  O << "    case " << OpsToPrint.back().first << ":";
   AsmWriterOperand TheOp = OpsToPrint.back().second;
   OpsToPrint.pop_back();
 
@@ -73,13 +73,13 @@ static void PrintCases(std::vector<std::pair<std::string,
   // emit a case label for them.
   for (unsigned i = OpsToPrint.size(); i != 0; --i)
     if (OpsToPrint[i-1].second == TheOp) {
-      O << "\n    case " << OpsToPrint[i-1].first << ": ";
+      O << "\n    case " << OpsToPrint[i-1].first << ":";
       OpsToPrint.erase(OpsToPrint.begin()+i-1);
     }
 
   // Finally, emit the code.
-  O << TheOp.getCode();
-  O << "break;\n";
+  O << "\n      " << TheOp.getCode();
+  O << "\n      break;\n";
 }
 
 
@@ -109,9 +109,9 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
 
   O << "  case " << FirstInst.CGI->Namespace << "::"
     << FirstInst.CGI->TheDef->getName() << ":\n";
-  for (unsigned i = 0, e = SimilarInsts.size(); i != e; ++i)
-    O << "  case " << SimilarInsts[i].CGI->Namespace << "::"
-      << SimilarInsts[i].CGI->TheDef->getName() << ":\n";
+  for (const AsmWriterInst &AWI : SimilarInsts)
+    O << "  case " << AWI.CGI->Namespace << "::"
+      << AWI.CGI->TheDef->getName() << ":\n";
   for (unsigned i = 0, e = FirstInst.Operands.size(); i != e; ++i) {
     if (i != DifferingOperand) {
       // If the operand is the same for all instructions, just print it.
@@ -120,13 +120,13 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
       // If this is the operand that varies between all of the instructions,
       // emit a switch for just this operand now.
       O << "    switch (MI->getOpcode()) {\n";
+      O << "    default: llvm_unreachable(\"Unexpected opcode.\");\n";
       std::vector<std::pair<std::string, AsmWriterOperand> > OpsToPrint;
       OpsToPrint.push_back(std::make_pair(FirstInst.CGI->Namespace + "::" +
                                           FirstInst.CGI->TheDef->getName(),
                                           FirstInst.Operands[i]));
 
-      for (unsigned si = 0, e = SimilarInsts.size(); si != e; ++si) {
-        AsmWriterInst &AWI = SimilarInsts[si];
+      for (const AsmWriterInst &AWI : SimilarInsts) {
         OpsToPrint.push_back(std::make_pair(AWI.CGI->Namespace+"::"+
                                             AWI.CGI->TheDef->getName(),
                                             AWI.Operands[i]));
@@ -159,11 +159,10 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
     if (!Inst)
       continue; // PHI, INLINEASM, CFI_INSTRUCTION, etc.
 
-    std::string Command;
     if (Inst->Operands.empty())
       continue;   // Instruction already done.
 
-    Command = "    " + Inst->Operands[0].getCode() + "\n";
+    std::string Command = "    " + Inst->Operands[0].getCode() + "\n";
 
     // Check to see if we already have 'Command' in UniqueOperandCommands.
     // If not, add it.
@@ -178,7 +177,7 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
       }
     if (!FoundIt) {
       InstIdxs[i] = UniqueOperandCommands.size();
-      UniqueOperandCommands.push_back(Command);
+      UniqueOperandCommands.push_back(std::move(Command));
       InstrsForCase.push_back(Inst->CGI->TheDef->getName());
 
       // This command matches one operand so far.
@@ -293,14 +292,13 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
   /// OpcodeInfo - This encodes the index of the string to use for the first
   /// chunk of the output as well as indices used for operand printing.
-  /// To reduce the number of unhandled cases, we expand the size from 32-bit
-  /// to 32+16 = 48-bit.
   std::vector<uint64_t> OpcodeInfo;
+  const unsigned OpcodeInfoBits = 64;
 
   // Add all strings to the string table upfront so it can generate an optimized
   // representation.
-  for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) {
-    AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions->at(i)];
+  for (const CodeGenInstruction *Inst : *NumberedInstructions) {
+    AsmWriterInst *AWI = CGIAWIMap[Inst];
     if (AWI &&
         AWI->Operands[0].OperandType ==
                  AsmWriterOperand::isLiteralTextOperand &&
@@ -314,8 +312,8 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   StringTable.layout();
 
   unsigned MaxStringIdx = 0;
-  for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) {
-    AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions->at(i)];
+  for (const CodeGenInstruction *Inst : *NumberedInstructions) {
+    AsmWriterInst *AWI = CGIAWIMap[Inst];
     unsigned Idx;
     if (!AWI) {
       // Something not handled by the asmwriter printer.
@@ -344,7 +342,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
   // To reduce code size, we compactify common instructions into a few bits
   // in the opcode-indexed table.
-  unsigned BitsLeft = 64-AsmStrBits;
+  unsigned BitsLeft = OpcodeInfoBits-AsmStrBits;
 
   std::vector<std::vector<std::string>> TableDrivenOperandPrinters;
 
@@ -372,7 +370,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     // Otherwise, we can include this in the initial lookup table.  Add it in.
     for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i)
       if (InstIdxs[i] != ~0U) {
-        OpcodeInfo[i] |= (uint64_t)InstIdxs[i] << (64-BitsLeft);
+        OpcodeInfo[i] |= (uint64_t)InstIdxs[i] << (OpcodeInfoBits-BitsLeft);
       }
     BitsLeft -= NumBits;
 
@@ -392,56 +390,55 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     TableDrivenOperandPrinters.push_back(std::move(UniqueOperandCommands));
   }
 
-
-  // We always emit at least one 32-bit table. A second table is emitted if
-  // more bits are needed.
-  O<<"  static const uint32_t OpInfo[] = {\n";
-  for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) {
-    O << "    " << (OpcodeInfo[i] & 0xffffffff) << "U,\t// "
-      << NumberedInstructions->at(i)->TheDef->getName() << "\n";
-  }
-  // Add a dummy entry so the array init doesn't end with a comma.
-  O << "    0U\n";
+  // Emit the string table itself.
+  O << "  static const char AsmStrs[] = {\n";
+  StringTable.emit(O, printChar);
   O << "  };\n\n";
 
-  if (BitsLeft < 32) {
-    // Add a second OpInfo table only when it is necessary.
-    // Adjust the type of the second table based on the number of bits needed.
-    O << "  static const uint"
-      << ((BitsLeft < 16) ? "32" : (BitsLeft < 24) ? "16" : "8")
-      << "_t OpInfo2[] = {\n";
+  // Emit the lookup tables in pieces to minimize wasted bytes.
+  unsigned BytesNeeded = ((OpcodeInfoBits - BitsLeft) + 7) / 8;
+  unsigned Table = 0, Shift = 0;
+  SmallString<128> BitsString;
+  raw_svector_ostream BitsOS(BitsString);
+  // If the total bits is more than 32-bits we need to use a 64-bit type.
+  BitsOS << "  uint" << ((BitsLeft < (OpcodeInfoBits - 32)) ? 64 : 32)
+         << "_t Bits = 0;\n";
+  while (BytesNeeded != 0) {
+    // Figure out how big this table section needs to be, but no bigger than 4.
+    unsigned TableSize = std::min(1 << Log2_32(BytesNeeded), 4);
+    BytesNeeded -= TableSize;
+    TableSize *= 8; // Convert to bits;
+    uint64_t Mask = (1ULL << TableSize) - 1;
+    O << "  static const uint" << TableSize << "_t OpInfo" << Table
+      << "[] = {\n";
     for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) {
-      O << "    " << (OpcodeInfo[i] >> 32) << "U,\t// "
+      O << "    " << ((OpcodeInfo[i] >> Shift) & Mask) << "U,\t// "
         << NumberedInstructions->at(i)->TheDef->getName() << "\n";
     }
-    // Add a dummy entry so the array init doesn't end with a comma.
-    O << "    0U\n";
     O << "  };\n\n";
+    // Emit string to combine the individual table lookups.
+    BitsOS << "  Bits |= ";
+    // If the total bits is more than 32-bits we need to use a 64-bit type.
+    if (BitsLeft < (OpcodeInfoBits - 32))
+      BitsOS << "(uint64_t)";
+    BitsOS << "OpInfo" << Table << "[MI->getOpcode()] << " << Shift << ";\n";
+    // Prepare the shift for the next iteration and increment the table count.
+    Shift += TableSize;
+    ++Table;
   }
 
-  // Emit the string itself.
-  O << "  static const char AsmStrs[] = {\n";
-  StringTable.emit(O, printChar);
-  O << "  };\n\n";
-
+  // Emit the initial tab character.
   O << "  O << \"\\t\";\n\n";
 
   O << "  // Emit the opcode for the instruction.\n";
-  if (BitsLeft < 32) {
-    // If we have two tables then we need to perform two lookups and combine
-    // the results into a single 64-bit value.
-    O << "  uint64_t Bits1 = OpInfo[MI->getOpcode()];\n"
-      << "  uint64_t Bits2 = OpInfo2[MI->getOpcode()];\n"
-      << "  uint64_t Bits = (Bits2 << 32) | Bits1;\n";
-  } else {
-    // If only one table is used we just need to perform a single lookup.
-    O << "  uint32_t Bits = OpInfo[MI->getOpcode()];\n";
-  }
+  O << BitsString;
+
+  // Emit the starting string.
   O << "  assert(Bits != 0 && \"Cannot print this instruction.\");\n"
     << "  O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ")-1;\n\n";
 
   // Output the table driven operand information.
-  BitsLeft = 64-AsmStrBits;
+  BitsLeft = OpcodeInfoBits-AsmStrBits;
   for (unsigned i = 0, e = TableDrivenOperandPrinters.size(); i != e; ++i) {
     std::vector<std::string> &Commands = TableDrivenOperandPrinters[i];
 
@@ -457,7 +454,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     if (Commands.size() == 2) {
       // Emit two possibilitys with if/else.
       O << "  if ((Bits >> "
-        << (64-BitsLeft) << ") & "
+        << (OpcodeInfoBits-BitsLeft) << ") & "
         << ((1 << NumBits)-1) << ") {\n"
         << Commands[1]
         << "  } else {\n"
@@ -468,14 +465,14 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
       O << Commands[0] << "\n\n";
     } else {
       O << "  switch ((Bits >> "
-        << (64-BitsLeft) << ") & "
+        << (OpcodeInfoBits-BitsLeft) << ") & "
         << ((1 << NumBits)-1) << ") {\n"
         << "  default: llvm_unreachable(\"Invalid command number.\");\n";
 
       // Print out all the cases.
-      for (unsigned i = 0, e = Commands.size(); i != e; ++i) {
-        O << "  case " << i << ":\n";
-        O << Commands[i];
+      for (unsigned j = 0, e = Commands.size(); j != e; ++j) {
+        O << "  case " << j << ":\n";
+        O << Commands[j];
         O << "    break;\n";
       }
       O << "  }\n\n";
@@ -484,14 +481,11 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   }
 
   // Okay, delete instructions with no operand info left.
-  for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
-    // Entire instruction has been emitted?
-    AsmWriterInst &Inst = Instructions[i];
-    if (Inst.Operands.empty()) {
-      Instructions.erase(Instructions.begin()+i);
-      --i; --e;
-    }
-  }
+  auto I = std::remove_if(Instructions.begin(), Instructions.end(),
+                          [](AsmWriterInst &Inst) {
+                            return Inst.Operands.empty();
+                          });
+  Instructions.erase(I, Instructions.end());
 
 
   // Because this is a vector, we want to emit from the end.  Reverse all of the
@@ -499,18 +493,18 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   std::reverse(Instructions.begin(), Instructions.end());
 
 
-  // Now that we've emitted all of the operand info that fit into 32 bits, emit
+  // Now that we've emitted all of the operand info that fit into 64 bits, emit
   // information for those instructions that are left.  This is a less dense
-  // encoding, but we expect the main 32-bit table to handle the majority of
+  // encoding, but we expect the main 64-bit table to handle the majority of
   // instructions.
   if (!Instructions.empty()) {
     // Find the opcode # of inline asm.
     O << "  switch (MI->getOpcode()) {\n";
+    O << "  default: llvm_unreachable(\"Unexpected opcode.\");\n";
     while (!Instructions.empty())
       EmitInstructions(Instructions, O);
 
     O << "  }\n";
-    O << "  return;\n";
   }
 
   O << "}\n";
@@ -603,16 +597,16 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
     << "\n";
 
   if (hasAltNames) {
-    for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i)
-      emitRegisterNameString(O, AltNameIndices[i]->getName(), Registers);
+    for (const Record *R : AltNameIndices)
+      emitRegisterNameString(O, R->getName(), Registers);
   } else
     emitRegisterNameString(O, "", Registers);
 
   if (hasAltNames) {
     O << "  switch(AltIdx) {\n"
       << "  default: llvm_unreachable(\"Invalid register alt name index!\");\n";
-    for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i) {
-      std::string AltName(AltNameIndices[i]->getName());
+    for (const Record *R : AltNameIndices) {
+      std::string AltName(R->getName());
       std::string Prefix = !Namespace.empty() ? Namespace + "::" : "";
       O << "  case " << Prefix << AltName << ":\n"
         << "    assert(*(AsmStrs" << AltName << "+RegAsmOffset"
@@ -799,9 +793,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   typedef std::set<std::pair<CodeGenInstAlias, int>, AliasPriorityComparator>
       AliasWithPriority;
   std::map<std::string, AliasWithPriority> AliasMap;
-  for (std::vector<Record*>::iterator
-         I = AllInstAliases.begin(), E = AllInstAliases.end(); I != E; ++I) {
-    const Record *R = *I;
+  for (Record *R : AllInstAliases) {
     int Priority = R->getValueAsInt("EmitPriority");
     if (Priority < 1)
       continue; // Aliases with priority 0 are never emitted.
@@ -809,7 +801,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
     const DagInit *DI = R->getValueAsDag("ResultInst");
     const DefInit *Op = cast<DefInit>(DI->getOperator());
     AliasMap[getQualifiedName(Op->getDef())].insert(
-        std::make_pair(CodeGenInstAlias(*I, Variant, Target), Priority));
+        std::make_pair(CodeGenInstAlias(R, Variant, Target), Priority));
   }
 
   // A map of which conditions need to be met for each instruction operand
@@ -977,9 +969,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
     CasesO.indent(2) << "case " << Entry.first << ":\n";
 
-    for (std::vector<IAPrinter*>::iterator
-           II = UniqueIAPs.begin(), IE = UniqueIAPs.end(); II != IE; ++II) {
-      IAPrinter *IAP = *II;
+    for (IAPrinter *IAP : UniqueIAPs) {
       CasesO.indent(4);
       IAP->print(CasesO);
       CasesO << '\n';
@@ -1098,10 +1088,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
 AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) {
   Record *AsmWriter = Target.getAsmWriter();
+  unsigned Variant = AsmWriter->getValueAsInt("Variant");
+  unsigned PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
   for (const CodeGenInstruction *I : Target.instructions())
     if (!I->AsmString.empty() && I->TheDef->getName() != "PHI")
-      Instructions.emplace_back(*I, AsmWriter->getValueAsInt("Variant"),
-                                AsmWriter->getValueAsInt("PassSubtarget"));
+      Instructions.emplace_back(*I, Variant, PassSubtarget);
 
   // Get the instruction numbering.
   NumberedInstructions = &Target.getInstructionsByEnumValue();
@@ -1109,8 +1100,8 @@ AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) {
   // Compute the CodeGenInstruction -> AsmWriterInst mapping.  Note that not
   // all machine instructions are necessarily being printed, so there may be
   // target instructions not in this map.
-  for (unsigned i = 0, e = Instructions.size(); i != e; ++i)
-    CGIAWIMap.insert(std::make_pair(Instructions[i].CGI, &Instructions[i]));
+  for (AsmWriterInst &AWI : Instructions)
+    CGIAWIMap.insert(std::make_pair(AWI.CGI, &AWI));
 }
 
 void AsmWriterEmitter::run(raw_ostream &O) {
diff --git a/contrib/llvm/utils/TableGen/AsmWriterInst.cpp b/contrib/llvm/utils/TableGen/AsmWriterInst.cpp
index 9541887..5b09765 100644
--- a/contrib/llvm/utils/TableGen/AsmWriterInst.cpp
+++ b/contrib/llvm/utils/TableGen/AsmWriterInst.cpp
@@ -29,8 +29,8 @@ static bool isIdentChar(char C) {
 std::string AsmWriterOperand::getCode() const {
   if (OperandType == isLiteralTextOperand) {
     if (Str.size() == 1)
-      return "O << '" + Str + "'; ";
-    return "O << \"" + Str + "\"; ";
+      return "O << '" + Str + "';";
+    return "O << \"" + Str + "\";";
   }
 
   if (OperandType == isLiteralStatementOperand)
@@ -44,7 +44,7 @@ std::string AsmWriterOperand::getCode() const {
   Result += ", O";
   if (!MiModifier.empty())
     Result += ", \"" + MiModifier + '"';
-  return Result + "); ";
+  return Result + ");";
 }
 
 /// ParseAsmString - Parse the specified Instruction's AsmString into this